gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fetch_impl.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2014 ARM Limited
3  * Copyright (c) 2012-2013 AMD
4  * All rights reserved.
5  *
6  * The license below extends only to copyright in the software and shall
7  * not be construed as granting a license to any other intellectual
8  * property including but not limited to intellectual property relating
9  * to a hardware implementation of the functionality of the software
10  * licensed hereunder. You may use the software subject to the license
11  * terms below provided that you ensure that this notice is replicated
12  * unmodified and in its entirety in all distributions of the software,
13  * modified or unmodified, in source code or in binary form.
14  *
15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions are
20  * met: redistributions of source code must retain the above copyright
21  * notice, this list of conditions and the following disclaimer;
22  * redistributions in binary form must reproduce the above copyright
23  * notice, this list of conditions and the following disclaimer in the
24  * documentation and/or other materials provided with the distribution;
25  * neither the name of the copyright holders nor the names of its
26  * contributors may be used to endorse or promote products derived from
27  * this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40  *
41  * Authors: Kevin Lim
42  * Korey Sewell
43  */
44 
45 #ifndef __CPU_O3_FETCH_IMPL_HH__
46 #define __CPU_O3_FETCH_IMPL_HH__
47 
48 #include <algorithm>
49 #include <cstring>
50 #include <list>
51 #include <map>
52 #include <queue>
53 
54 #include "arch/isa_traits.hh"
55 #include "arch/tlb.hh"
56 #include "arch/utility.hh"
57 #include "arch/vtophys.hh"
58 #include "base/random.hh"
59 #include "base/types.hh"
60 #include "config/the_isa.hh"
61 #include "cpu/base.hh"
62 //#include "cpu/checker/cpu.hh"
63 #include "cpu/o3/fetch.hh"
64 #include "cpu/exetrace.hh"
65 #include "debug/Activity.hh"
66 #include "debug/Drain.hh"
67 #include "debug/Fetch.hh"
68 #include "debug/O3PipeView.hh"
69 #include "mem/packet.hh"
70 #include "params/DerivO3CPU.hh"
71 #include "sim/byteswap.hh"
72 #include "sim/core.hh"
73 #include "sim/eventq.hh"
74 #include "sim/full_system.hh"
75 #include "sim/system.hh"
76 #include "cpu/o3/isa_specific.hh"
77 
78 using namespace std;
79 
80 template<class Impl>
81 DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
82  : cpu(_cpu),
83  decodeToFetchDelay(params->decodeToFetchDelay),
84  renameToFetchDelay(params->renameToFetchDelay),
85  iewToFetchDelay(params->iewToFetchDelay),
86  commitToFetchDelay(params->commitToFetchDelay),
87  fetchWidth(params->fetchWidth),
88  decodeWidth(params->decodeWidth),
89  retryPkt(NULL),
90  retryTid(InvalidThreadID),
91  cacheBlkSize(cpu->cacheLineSize()),
92  fetchBufferSize(params->fetchBufferSize),
93  fetchBufferMask(fetchBufferSize - 1),
94  fetchQueueSize(params->fetchQueueSize),
95  numThreads(params->numThreads),
96  numFetchingThreads(params->smtNumFetchingThreads),
97  finishTranslationEvent(this)
98 {
99  if (numThreads > Impl::MaxThreads)
100  fatal("numThreads (%d) is larger than compiled limit (%d),\n"
101  "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
102  numThreads, static_cast<int>(Impl::MaxThreads));
103  if (fetchWidth > Impl::MaxWidth)
104  fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
105  "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
106  fetchWidth, static_cast<int>(Impl::MaxWidth));
108  fatal("fetch buffer size (%u bytes) is greater than the cache "
109  "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
111  fatal("cache block (%u bytes) is not a multiple of the "
112  "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
113 
114  std::string policy = params->smtFetchPolicy;
115 
116  // Convert string to lowercase
117  std::transform(policy.begin(), policy.end(), policy.begin(),
118  (int(*)(int)) tolower);
119 
120  // Figure out fetch policy
121  if (policy == "singlethread") {
123  if (numThreads > 1)
124  panic("Invalid Fetch Policy for a SMT workload.");
125  } else if (policy == "roundrobin") {
127  DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
128  } else if (policy == "branch") {
130  DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
131  } else if (policy == "iqcount") {
132  fetchPolicy = IQ;
133  DPRINTF(Fetch, "Fetch policy set to IQ count\n");
134  } else if (policy == "lsqcount") {
135  fetchPolicy = LSQ;
136  DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
137  } else {
138  fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
139  " RoundRobin,LSQcount,IQcount}\n");
140  }
141 
142  // Get the size of an instruction.
143  instSize = sizeof(TheISA::MachInst);
144 
145  for (int i = 0; i < Impl::MaxThreads; i++) {
146  decoder[i] = NULL;
147  fetchBuffer[i] = NULL;
148  fetchBufferPC[i] = 0;
149  fetchBufferValid[i] = false;
150  }
151 
152  branchPred = params->branchPred;
153 
154  for (ThreadID tid = 0; tid < numThreads; tid++) {
155  decoder[tid] = new TheISA::Decoder(params->isa[tid]);
156  // Create space to buffer the cache line data,
157  // which may not hold the entire cache line.
158  fetchBuffer[tid] = new uint8_t[fetchBufferSize];
159  }
160 }
161 
162 template <class Impl>
163 std::string
165 {
166  return cpu->name() + ".fetch";
167 }
168 
169 template <class Impl>
170 void
172 {
173  ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
174  ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
175  "FetchRequest");
176 
177 }
178 
179 template <class Impl>
180 void
182 {
183  icacheStallCycles
184  .name(name() + ".icacheStallCycles")
185  .desc("Number of cycles fetch is stalled on an Icache miss")
186  .prereq(icacheStallCycles);
187 
188  fetchedInsts
189  .name(name() + ".Insts")
190  .desc("Number of instructions fetch has processed")
191  .prereq(fetchedInsts);
192 
193  fetchedBranches
194  .name(name() + ".Branches")
195  .desc("Number of branches that fetch encountered")
196  .prereq(fetchedBranches);
197 
198  predictedBranches
199  .name(name() + ".predictedBranches")
200  .desc("Number of branches that fetch has predicted taken")
201  .prereq(predictedBranches);
202 
203  fetchCycles
204  .name(name() + ".Cycles")
205  .desc("Number of cycles fetch has run and was not squashing or"
206  " blocked")
207  .prereq(fetchCycles);
208 
209  fetchSquashCycles
210  .name(name() + ".SquashCycles")
211  .desc("Number of cycles fetch has spent squashing")
212  .prereq(fetchSquashCycles);
213 
214  fetchTlbCycles
215  .name(name() + ".TlbCycles")
216  .desc("Number of cycles fetch has spent waiting for tlb")
217  .prereq(fetchTlbCycles);
218 
219  fetchIdleCycles
220  .name(name() + ".IdleCycles")
221  .desc("Number of cycles fetch was idle")
222  .prereq(fetchIdleCycles);
223 
224  fetchBlockedCycles
225  .name(name() + ".BlockedCycles")
226  .desc("Number of cycles fetch has spent blocked")
227  .prereq(fetchBlockedCycles);
228 
229  fetchedCacheLines
230  .name(name() + ".CacheLines")
231  .desc("Number of cache lines fetched")
232  .prereq(fetchedCacheLines);
233 
234  fetchMiscStallCycles
235  .name(name() + ".MiscStallCycles")
236  .desc("Number of cycles fetch has spent waiting on interrupts, or "
237  "bad addresses, or out of MSHRs")
238  .prereq(fetchMiscStallCycles);
239 
240  fetchPendingDrainCycles
241  .name(name() + ".PendingDrainCycles")
242  .desc("Number of cycles fetch has spent waiting on pipes to drain")
243  .prereq(fetchPendingDrainCycles);
244 
245  fetchNoActiveThreadStallCycles
246  .name(name() + ".NoActiveThreadStallCycles")
247  .desc("Number of stall cycles due to no active thread to fetch from")
248  .prereq(fetchNoActiveThreadStallCycles);
249 
250  fetchPendingTrapStallCycles
251  .name(name() + ".PendingTrapStallCycles")
252  .desc("Number of stall cycles due to pending traps")
253  .prereq(fetchPendingTrapStallCycles);
254 
255  fetchPendingQuiesceStallCycles
256  .name(name() + ".PendingQuiesceStallCycles")
257  .desc("Number of stall cycles due to pending quiesce instructions")
258  .prereq(fetchPendingQuiesceStallCycles);
259 
260  fetchIcacheWaitRetryStallCycles
261  .name(name() + ".IcacheWaitRetryStallCycles")
262  .desc("Number of stall cycles due to full MSHR")
263  .prereq(fetchIcacheWaitRetryStallCycles);
264 
265  fetchIcacheSquashes
266  .name(name() + ".IcacheSquashes")
267  .desc("Number of outstanding Icache misses that were squashed")
268  .prereq(fetchIcacheSquashes);
269 
270  fetchTlbSquashes
271  .name(name() + ".ItlbSquashes")
272  .desc("Number of outstanding ITLB misses that were squashed")
273  .prereq(fetchTlbSquashes);
274 
275  fetchNisnDist
276  .init(/* base value */ 0,
277  /* last value */ fetchWidth,
278  /* bucket size */ 1)
279  .name(name() + ".rateDist")
280  .desc("Number of instructions fetched each cycle (Total)")
281  .flags(Stats::pdf);
282 
283  idleRate
284  .name(name() + ".idleRate")
285  .desc("Percent of cycles fetch was idle")
286  .prereq(idleRate);
287  idleRate = fetchIdleCycles * 100 / cpu->numCycles;
288 
289  branchRate
290  .name(name() + ".branchRate")
291  .desc("Number of branch fetches per cycle")
292  .flags(Stats::total);
293  branchRate = fetchedBranches / cpu->numCycles;
294 
295  fetchRate
296  .name(name() + ".rate")
297  .desc("Number of inst fetches per cycle")
298  .flags(Stats::total);
299  fetchRate = fetchedInsts / cpu->numCycles;
300 }
301 
302 template<class Impl>
303 void
305 {
306  timeBuffer = time_buffer;
307 
308  // Create wires to get information from proper places in time buffer.
309  fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
310  fromRename = timeBuffer->getWire(-renameToFetchDelay);
311  fromIEW = timeBuffer->getWire(-iewToFetchDelay);
312  fromCommit = timeBuffer->getWire(-commitToFetchDelay);
313 }
314 
315 template<class Impl>
316 void
318 {
319  activeThreads = at_ptr;
320 }
321 
322 template<class Impl>
323 void
325 {
326  // Create wire to write information to proper place in fetch time buf.
327  toDecode = ftb_ptr->getWire(0);
328 }
329 
330 template<class Impl>
331 void
333 {
334  assert(priorityList.empty());
335  resetStage();
336 
337  // Fetch needs to start fetching instructions at the very beginning,
338  // so it must start up in active state.
339  switchToActive();
340 }
341 
342 template<class Impl>
343 void
345 {
346  numInst = 0;
347  interruptPending = false;
348  cacheBlocked = false;
349 
350  priorityList.clear();
351 
352  // Setup PC and nextPC with initial state.
353  for (ThreadID tid = 0; tid < numThreads; ++tid) {
354  fetchStatus[tid] = Running;
355  pc[tid] = cpu->pcState(tid);
356  fetchOffset[tid] = 0;
357  macroop[tid] = NULL;
358 
359  delayedCommit[tid] = false;
360  memReq[tid] = NULL;
361 
362  stalls[tid].decode = false;
363  stalls[tid].drain = false;
364 
365  fetchBufferPC[tid] = 0;
366  fetchBufferValid[tid] = false;
367 
368  fetchQueue[tid].clear();
369 
370  priorityList.push_back(tid);
371  }
372 
373  wroteToTimeBuffer = false;
374  _status = Inactive;
375 }
376 
377 template<class Impl>
378 void
380 {
381  ThreadID tid = cpu->contextToThread(pkt->req->contextId());
382 
383  DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid);
384  assert(!cpu->switchedOut());
385 
386  // Only change the status if it's still waiting on the icache access
387  // to return.
388  if (fetchStatus[tid] != IcacheWaitResponse ||
389  pkt->req != memReq[tid]) {
390  ++fetchIcacheSquashes;
391  delete pkt->req;
392  delete pkt;
393  return;
394  }
395 
396  memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
397  fetchBufferValid[tid] = true;
398 
399  // Wake up the CPU (if it went to sleep and was waiting on
400  // this completion event).
401  cpu->wakeCPU();
402 
403  DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
404  tid);
405 
406  switchToActive();
407 
408  // Only switch to IcacheAccessComplete if we're not stalled as well.
409  if (checkStall(tid)) {
410  fetchStatus[tid] = Blocked;
411  } else {
412  fetchStatus[tid] = IcacheAccessComplete;
413  }
414 
415  pkt->req->setAccessLatency();
416  cpu->ppInstAccessComplete->notify(pkt);
417  // Reset the mem req to NULL.
418  delete pkt->req;
419  delete pkt;
420  memReq[tid] = NULL;
421 }
422 
423 template <class Impl>
424 void
426 {
427  for (ThreadID i = 0; i < numThreads; ++i) {
428  stalls[i].decode = false;
429  stalls[i].drain = false;
430  }
431 }
432 
433 template <class Impl>
434 void
436 {
437  assert(isDrained());
438  assert(retryPkt == NULL);
439  assert(retryTid == InvalidThreadID);
440  assert(!cacheBlocked);
441  assert(!interruptPending);
442 
443  for (ThreadID i = 0; i < numThreads; ++i) {
444  assert(!memReq[i]);
445  assert(fetchStatus[i] == Idle || stalls[i].drain);
446  }
447 
448  branchPred->drainSanityCheck();
449 }
450 
451 template <class Impl>
452 bool
454 {
455  /* Make sure that threads are either idle of that the commit stage
456  * has signaled that draining has completed by setting the drain
457  * stall flag. This effectively forces the pipeline to be disabled
458  * until the whole system is drained (simulation may continue to
459  * drain other components).
460  */
461  for (ThreadID i = 0; i < numThreads; ++i) {
462  // Verify fetch queues are drained
463  if (!fetchQueue[i].empty())
464  return false;
465 
466  // Return false if not idle or drain stalled
467  if (fetchStatus[i] != Idle) {
468  if (fetchStatus[i] == Blocked && stalls[i].drain)
469  continue;
470  else
471  return false;
472  }
473  }
474 
475  /* The pipeline might start up again in the middle of the drain
476  * cycle if the finish translation event is scheduled, so make
477  * sure that's not the case.
478  */
479  return !finishTranslationEvent.scheduled();
480 }
481 
482 template <class Impl>
483 void
485 {
486  assert(cpu->getInstPort().isConnected());
487  resetStage();
488 
489 }
490 
491 template <class Impl>
492 void
494 {
495  assert(cpu->isDraining());
496  assert(!stalls[tid].drain);
497  DPRINTF(Drain, "%i: Thread drained.\n", tid);
498  stalls[tid].drain = true;
499 }
500 
501 template <class Impl>
502 void
504 {
505  DPRINTF(Fetch, "Waking up from quiesce\n");
506  // Hopefully this is safe
507  // @todo: Allow other threads to wake from quiesce.
508  fetchStatus[0] = Running;
509 }
510 
511 template <class Impl>
512 inline void
514 {
515  if (_status == Inactive) {
516  DPRINTF(Activity, "Activating stage.\n");
517 
518  cpu->activateStage(O3CPU::FetchIdx);
519 
520  _status = Active;
521  }
522 }
523 
524 template <class Impl>
525 inline void
527 {
528  if (_status == Active) {
529  DPRINTF(Activity, "Deactivating stage.\n");
530 
531  cpu->deactivateStage(O3CPU::FetchIdx);
532 
533  _status = Inactive;
534  }
535 }
536 
537 template <class Impl>
538 void
540 {
541  // Update priority list
542  auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
543  if (thread_it != priorityList.end()) {
544  priorityList.erase(thread_it);
545  }
546 }
547 
548 template <class Impl>
549 bool
551  DynInstPtr &inst, TheISA::PCState &nextPC)
552 {
553  // Do branch prediction check here.
554  // A bit of a misnomer...next_PC is actually the current PC until
555  // this function updates it.
556  bool predict_taken;
557 
558  if (!inst->isControl()) {
559  TheISA::advancePC(nextPC, inst->staticInst);
560  inst->setPredTarg(nextPC);
561  inst->setPredTaken(false);
562  return false;
563  }
564 
565  ThreadID tid = inst->threadNumber;
566  predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
567  nextPC, tid);
568 
569  if (predict_taken) {
570  DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %s.\n",
571  tid, inst->seqNum, nextPC);
572  } else {
573  DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n",
574  tid, inst->seqNum);
575  }
576 
577  DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n",
578  tid, inst->seqNum, nextPC);
579  inst->setPredTarg(nextPC);
580  inst->setPredTaken(predict_taken);
581 
582  ++fetchedBranches;
583 
584  if (predict_taken) {
585  ++predictedBranches;
586  }
587 
588  return predict_taken;
589 }
590 
591 template <class Impl>
592 bool
594 {
595  Fault fault = NoFault;
596 
597  assert(!cpu->switchedOut());
598 
599  // @todo: not sure if these should block translation.
600  //AlphaDep
601  if (cacheBlocked) {
602  DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
603  tid);
604  return false;
605  } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
606  // Hold off fetch from getting new instructions when:
607  // Cache is blocked, or
608  // while an interrupt is pending and we're not in PAL mode, or
609  // fetch is switched out.
610  DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
611  tid);
612  return false;
613  }
614 
615  // Align the fetch address to the start of a fetch buffer segment.
616  Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
617 
618  DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
619  tid, fetchBufferBlockPC, vaddr);
620 
621  // Setup the memReq to do a read of the first instruction's address.
622  // Set the appropriate read size and flags as well.
623  // Build request here.
624  RequestPtr mem_req =
625  new Request(tid, fetchBufferBlockPC, fetchBufferSize,
626  Request::INST_FETCH, cpu->instMasterId(), pc,
627  cpu->thread[tid]->contextId());
628 
629  mem_req->taskId(cpu->taskId());
630 
631  memReq[tid] = mem_req;
632 
633  // Initiate translation of the icache block
634  fetchStatus[tid] = ItlbWait;
635  FetchTranslation *trans = new FetchTranslation(this);
636  cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(),
637  trans, BaseTLB::Execute);
638  return true;
639 }
640 
641 template <class Impl>
642 void
644 {
645  ThreadID tid = cpu->contextToThread(mem_req->contextId());
646  Addr fetchBufferBlockPC = mem_req->getVaddr();
647 
648  assert(!cpu->switchedOut());
649 
650  // Wake up CPU if it was idle
651  cpu->wakeCPU();
652 
653  if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
654  mem_req->getVaddr() != memReq[tid]->getVaddr()) {
655  DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
656  tid);
657  ++fetchTlbSquashes;
658  delete mem_req;
659  return;
660  }
661 
662 
663  // If translation was successful, attempt to read the icache block.
664  if (fault == NoFault) {
665  // Check that we're not going off into random memory
666  // If we have, just wait around for commit to squash something and put
667  // us on the right track
668  if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
669  warn("Address %#x is outside of physical memory, stopping fetch\n",
670  mem_req->getPaddr());
671  fetchStatus[tid] = NoGoodAddr;
672  delete mem_req;
673  memReq[tid] = NULL;
674  return;
675  }
676 
677  // Build packet here.
678  PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
679  data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
680 
681  fetchBufferPC[tid] = fetchBufferBlockPC;
682  fetchBufferValid[tid] = false;
683  DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
684 
685  fetchedCacheLines++;
686 
687  // Access the cache.
688  if (!cpu->getInstPort().sendTimingReq(data_pkt)) {
689  assert(retryPkt == NULL);
690  assert(retryTid == InvalidThreadID);
691  DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
692 
693  fetchStatus[tid] = IcacheWaitRetry;
694  retryPkt = data_pkt;
695  retryTid = tid;
696  cacheBlocked = true;
697  } else {
698  DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid);
699  DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
700  "response.\n", tid);
701  lastIcacheStall[tid] = curTick();
702  fetchStatus[tid] = IcacheWaitResponse;
703  // Notify Fetch Request probe when a packet containing a fetch
704  // request is successfully sent
705  ppFetchRequestSent->notify(mem_req);
706  }
707  } else {
708  // Don't send an instruction to decode if we can't handle it.
709  if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
710  assert(!finishTranslationEvent.scheduled());
711  finishTranslationEvent.setFault(fault);
712  finishTranslationEvent.setReq(mem_req);
713  cpu->schedule(finishTranslationEvent,
714  cpu->clockEdge(Cycles(1)));
715  return;
716  }
717  DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
718  tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
719  // Translation faulted, icache request won't be sent.
720  delete mem_req;
721  memReq[tid] = NULL;
722 
723  // Send the fault to commit. This thread will not do anything
724  // until commit handles the fault. The only other way it can
725  // wake up is if a squash comes along and changes the PC.
726  TheISA::PCState fetchPC = pc[tid];
727 
728  DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid);
729  // We will use a nop in ordier to carry the fault.
730  DynInstPtr instruction = buildInst(tid,
731  decoder[tid]->decode(TheISA::NoopMachInst, fetchPC.instAddr()),
732  NULL, fetchPC, fetchPC, false);
733 
734  instruction->setPredTarg(fetchPC);
735  instruction->fault = fault;
736  wroteToTimeBuffer = true;
737 
738  DPRINTF(Activity, "Activity this cycle.\n");
739  cpu->activityThisCycle();
740 
741  fetchStatus[tid] = TrapPending;
742 
743  DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid);
744  DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n",
745  tid, fault->name(), pc[tid]);
746  }
747  _status = updateFetchStatus();
748 }
749 
750 template <class Impl>
751 inline void
753  const DynInstPtr squashInst, ThreadID tid)
754 {
755  DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n",
756  tid, newPC);
757 
758  pc[tid] = newPC;
759  fetchOffset[tid] = 0;
760  if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
761  macroop[tid] = squashInst->macroop;
762  else
763  macroop[tid] = NULL;
764  decoder[tid]->reset();
765 
766  // Clear the icache miss if it's outstanding.
767  if (fetchStatus[tid] == IcacheWaitResponse) {
768  DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
769  tid);
770  memReq[tid] = NULL;
771  } else if (fetchStatus[tid] == ItlbWait) {
772  DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n",
773  tid);
774  memReq[tid] = NULL;
775  }
776 
777  // Get rid of the retrying packet if it was from this thread.
778  if (retryTid == tid) {
779  assert(cacheBlocked);
780  if (retryPkt) {
781  delete retryPkt->req;
782  delete retryPkt;
783  }
784  retryPkt = NULL;
785  retryTid = InvalidThreadID;
786  }
787 
788  fetchStatus[tid] = Squashing;
789 
790  // Empty fetch queue
791  fetchQueue[tid].clear();
792 
793  // microops are being squashed, it is not known wheather the
794  // youngest non-squashed microop was marked delayed commit
795  // or not. Setting the flag to true ensures that the
796  // interrupts are not handled when they cannot be, though
797  // some opportunities to handle interrupts may be missed.
798  delayedCommit[tid] = true;
799 
800  ++fetchSquashCycles;
801 }
802 
803 template<class Impl>
804 void
806  const DynInstPtr squashInst,
807  const InstSeqNum seq_num, ThreadID tid)
808 {
809  DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid);
810 
811  doSquash(newPC, squashInst, tid);
812 
813  // Tell the CPU to remove any instructions that are in flight between
814  // fetch and decode.
815  cpu->removeInstsUntil(seq_num, tid);
816 }
817 
818 template<class Impl>
819 bool
821 {
822  bool ret_val = false;
823 
824  if (stalls[tid].drain) {
825  assert(cpu->isDraining());
826  DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid);
827  ret_val = true;
828  }
829 
830  return ret_val;
831 }
832 
833 template<class Impl>
836 {
837  //Check Running
838  list<ThreadID>::iterator threads = activeThreads->begin();
839  list<ThreadID>::iterator end = activeThreads->end();
840 
841  while (threads != end) {
842  ThreadID tid = *threads++;
843 
844  if (fetchStatus[tid] == Running ||
845  fetchStatus[tid] == Squashing ||
846  fetchStatus[tid] == IcacheAccessComplete) {
847 
848  if (_status == Inactive) {
849  DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
850 
851  if (fetchStatus[tid] == IcacheAccessComplete) {
852  DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
853  "completion\n",tid);
854  }
855 
856  cpu->activateStage(O3CPU::FetchIdx);
857  }
858 
859  return Active;
860  }
861  }
862 
863  // Stage is switching from active to inactive, notify CPU of it.
864  if (_status == Active) {
865  DPRINTF(Activity, "Deactivating stage.\n");
866 
867  cpu->deactivateStage(O3CPU::FetchIdx);
868  }
869 
870  return Inactive;
871 }
872 
873 template <class Impl>
874 void
876  const InstSeqNum seq_num, DynInstPtr squashInst,
877  ThreadID tid)
878 {
879  DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid);
880 
881  doSquash(newPC, squashInst, tid);
882 
883  // Tell the CPU to remove any instructions that are not in the ROB.
884  cpu->removeInstsNotInROB(tid);
885 }
886 
887 template <class Impl>
888 void
890 {
891  list<ThreadID>::iterator threads = activeThreads->begin();
892  list<ThreadID>::iterator end = activeThreads->end();
893  bool status_change = false;
894 
895  wroteToTimeBuffer = false;
896 
897  for (ThreadID i = 0; i < numThreads; ++i) {
898  issuePipelinedIfetch[i] = false;
899  }
900 
901  while (threads != end) {
902  ThreadID tid = *threads++;
903 
904  // Check the signals for each thread to determine the proper status
905  // for each thread.
906  bool updated_status = checkSignalsAndUpdate(tid);
907  status_change = status_change || updated_status;
908  }
909 
910  DPRINTF(Fetch, "Running stage.\n");
911 
912  if (FullSystem) {
913  if (fromCommit->commitInfo[0].interruptPending) {
914  interruptPending = true;
915  }
916 
917  if (fromCommit->commitInfo[0].clearInterrupt) {
918  interruptPending = false;
919  }
920  }
921 
922  for (threadFetched = 0; threadFetched < numFetchingThreads;
923  threadFetched++) {
924  // Fetch each of the actively fetching threads.
925  fetch(status_change);
926  }
927 
928  // Record number of instructions fetched this cycle for distribution.
929  fetchNisnDist.sample(numInst);
930 
931  if (status_change) {
932  // Change the fetch stage status if there was a status change.
933  _status = updateFetchStatus();
934  }
935 
936  // Issue the next I-cache request if possible.
937  for (ThreadID i = 0; i < numThreads; ++i) {
938  if (issuePipelinedIfetch[i]) {
939  pipelineIcacheAccesses(i);
940  }
941  }
942 
943  // Send instructions enqueued into the fetch queue to decode.
944  // Limit rate by fetchWidth. Stall if decode is stalled.
945  unsigned insts_to_decode = 0;
946  unsigned available_insts = 0;
947 
948  for (auto tid : *activeThreads) {
949  if (!stalls[tid].decode) {
950  available_insts += fetchQueue[tid].size();
951  }
952  }
953 
954  // Pick a random thread to start trying to grab instructions from
955  auto tid_itr = activeThreads->begin();
956  std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));
957 
958  while (available_insts != 0 && insts_to_decode < decodeWidth) {
959  ThreadID tid = *tid_itr;
960  if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
961  auto inst = fetchQueue[tid].front();
962  toDecode->insts[toDecode->size++] = inst;
963  DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
964  "fetch queue. Fetch queue size: %i.\n",
965  tid, inst->seqNum, fetchQueue[tid].size());
966 
967  wroteToTimeBuffer = true;
968  fetchQueue[tid].pop_front();
969  insts_to_decode++;
970  available_insts--;
971  }
972 
973  tid_itr++;
974  // Wrap around if at end of active threads list
975  if (tid_itr == activeThreads->end())
976  tid_itr = activeThreads->begin();
977  }
978 
979  // If there was activity this cycle, inform the CPU of it.
980  if (wroteToTimeBuffer) {
981  DPRINTF(Activity, "Activity this cycle.\n");
982  cpu->activityThisCycle();
983  }
984 
985  // Reset the number of the instruction we've fetched.
986  numInst = 0;
987 }
988 
989 template <class Impl>
990 bool
992 {
993  // Update the per thread stall statuses.
994  if (fromDecode->decodeBlock[tid]) {
995  stalls[tid].decode = true;
996  }
997 
998  if (fromDecode->decodeUnblock[tid]) {
999  assert(stalls[tid].decode);
1000  assert(!fromDecode->decodeBlock[tid]);
1001  stalls[tid].decode = false;
1002  }
1003 
1004  // Check squash signals from commit.
1005  if (fromCommit->commitInfo[tid].squash) {
1006 
1007  DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
1008  "from commit.\n",tid);
1009  // In any case, squash.
1010  squash(fromCommit->commitInfo[tid].pc,
1011  fromCommit->commitInfo[tid].doneSeqNum,
1012  fromCommit->commitInfo[tid].squashInst, tid);
1013 
1014  // If it was a branch mispredict on a control instruction, update the
1015  // branch predictor with that instruction, otherwise just kill the
1016  // invalid state we generated in after sequence number
1017  if (fromCommit->commitInfo[tid].mispredictInst &&
1018  fromCommit->commitInfo[tid].mispredictInst->isControl()) {
1019  branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1020  fromCommit->commitInfo[tid].pc,
1021  fromCommit->commitInfo[tid].branchTaken,
1022  tid);
1023  } else {
1024  branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1025  tid);
1026  }
1027 
1028  return true;
1029  } else if (fromCommit->commitInfo[tid].doneSeqNum) {
1030  // Update the branch predictor if it wasn't a squashed instruction
1031  // that was broadcasted.
1032  branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
1033  }
1034 
1035  // Check squash signals from decode.
1036  if (fromDecode->decodeInfo[tid].squash) {
1037  DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
1038  "from decode.\n",tid);
1039 
1040  // Update the branch predictor.
1041  if (fromDecode->decodeInfo[tid].branchMispredict) {
1042  branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1043  fromDecode->decodeInfo[tid].nextPC,
1044  fromDecode->decodeInfo[tid].branchTaken,
1045  tid);
1046  } else {
1047  branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1048  tid);
1049  }
1050 
1051  if (fetchStatus[tid] != Squashing) {
1052 
1053  DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
1054  fromDecode->decodeInfo[tid].nextPC);
1055  // Squash unless we're already squashing
1056  squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
1057  fromDecode->decodeInfo[tid].squashInst,
1058  fromDecode->decodeInfo[tid].doneSeqNum,
1059  tid);
1060 
1061  return true;
1062  }
1063  }
1064 
1065  if (checkStall(tid) &&
1066  fetchStatus[tid] != IcacheWaitResponse &&
1067  fetchStatus[tid] != IcacheWaitRetry &&
1068  fetchStatus[tid] != ItlbWait &&
1069  fetchStatus[tid] != QuiescePending) {
1070  DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
1071 
1072  fetchStatus[tid] = Blocked;
1073 
1074  return true;
1075  }
1076 
1077  if (fetchStatus[tid] == Blocked ||
1078  fetchStatus[tid] == Squashing) {
1079  // Switch status to running if fetch isn't being told to block or
1080  // squash this cycle.
1081  DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
1082  tid);
1083 
1084  fetchStatus[tid] = Running;
1085 
1086  return true;
1087  }
1088 
1089  // If we've reached this point, we have not gotten any signals that
1090  // cause fetch to change its status. Fetch remains the same as before.
1091  return false;
1092 }
1093 
1094 template<class Impl>
1095 typename Impl::DynInstPtr
1097  StaticInstPtr curMacroop, TheISA::PCState thisPC,
1098  TheISA::PCState nextPC, bool trace)
1099 {
1100  // Get a sequence number.
1101  InstSeqNum seq = cpu->getAndIncrementInstSeq();
1102 
1103  // Create a new DynInst from the instruction fetched.
1104  DynInstPtr instruction =
1105  new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
1106  instruction->setTid(tid);
1107 
1108  instruction->setASID(tid);
1109 
1110  instruction->setThreadState(cpu->thread[tid]);
1111 
1112  DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created "
1113  "[sn:%lli].\n", tid, thisPC.instAddr(),
1114  thisPC.microPC(), seq);
1115 
1116  DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid,
1117  instruction->staticInst->
1118  disassemble(thisPC.instAddr()));
1119 
1120 #if TRACING_ON
1121  if (trace) {
1122  instruction->traceData =
1123  cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
1124  instruction->staticInst, thisPC, curMacroop);
1125  }
1126 #else
1127  instruction->traceData = NULL;
1128 #endif
1129 
1130  // Add instruction to the CPU's list of instructions.
1131  instruction->setInstListIt(cpu->addInst(instruction));
1132 
1133  // Write the instruction to the first slot in the queue
1134  // that heads to decode.
1135  assert(numInst < fetchWidth);
1136  fetchQueue[tid].push_back(instruction);
1137  assert(fetchQueue[tid].size() <= fetchQueueSize);
1138  DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",
1139  tid, fetchQueue[tid].size(), fetchQueueSize);
1140  //toDecode->insts[toDecode->size++] = instruction;
1141 
1142  // Keep track of if we can take an interrupt at this boundary
1143  delayedCommit[tid] = instruction->isDelayedCommit();
1144 
1145  return instruction;
1146 }
1147 
1148 template<class Impl>
1149 void
1150 DefaultFetch<Impl>::fetch(bool &status_change)
1151 {
1153  // Start actual fetch
1155  ThreadID tid = getFetchingThread(fetchPolicy);
1156 
1157  assert(!cpu->switchedOut());
1158 
1159  if (tid == InvalidThreadID) {
1160  // Breaks looping condition in tick()
1161  threadFetched = numFetchingThreads;
1162 
1163  if (numThreads == 1) { // @todo Per-thread stats
1164  profileStall(0);
1165  }
1166 
1167  return;
1168  }
1169 
1170  DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1171 
1172  // The current PC.
1173  TheISA::PCState thisPC = pc[tid];
1174 
1175  Addr pcOffset = fetchOffset[tid];
1176  Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1177 
1178  bool inRom = isRomMicroPC(thisPC.microPC());
1179 
1180  // If returning from the delay of a cache miss, then update the status
1181  // to running, otherwise do the cache access. Possibly move this up
1182  // to tick() function.
1183  if (fetchStatus[tid] == IcacheAccessComplete) {
1184  DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid);
1185 
1186  fetchStatus[tid] = Running;
1187  status_change = true;
1188  } else if (fetchStatus[tid] == Running) {
1189  // Align the fetch PC so its at the start of a fetch buffer segment.
1190  Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1191 
1192  // If buffer is no longer valid or fetchAddr has moved to point
1193  // to the next cache block, AND we have no remaining ucode
1194  // from a macro-op, then start fetch from icache.
1195  if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])
1196  && !inRom && !macroop[tid]) {
1197  DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
1198  "instruction, starting at PC %s.\n", tid, thisPC);
1199 
1200  fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1201 
1202  if (fetchStatus[tid] == IcacheWaitResponse)
1203  ++icacheStallCycles;
1204  else if (fetchStatus[tid] == ItlbWait)
1205  ++fetchTlbCycles;
1206  else
1207  ++fetchMiscStallCycles;
1208  return;
1209  } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
1210  // Stall CPU if an interrupt is posted and we're not issuing
1211  // an delayed commit micro-op currently (delayed commit instructions
1212  // are not interruptable by interrupts, only faults)
1213  ++fetchMiscStallCycles;
1214  DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid);
1215  return;
1216  }
1217  } else {
1218  if (fetchStatus[tid] == Idle) {
1219  ++fetchIdleCycles;
1220  DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
1221  }
1222 
1223  // Status is Idle, so fetch should do nothing.
1224  return;
1225  }
1226 
1227  ++fetchCycles;
1228 
1229  TheISA::PCState nextPC = thisPC;
1230 
1231  StaticInstPtr staticInst = NULL;
1232  StaticInstPtr curMacroop = macroop[tid];
1233 
1234  // If the read of the first instruction was successful, then grab the
1235  // instructions from the rest of the cache line and put them into the
1236  // queue heading to decode.
1237 
1238  DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
1239  "decode.\n", tid);
1240 
1241  // Need to keep track of whether or not a predicted branch
1242  // ended this fetch block.
1243  bool predictedBranch = false;
1244 
1245  // Need to halt fetch if quiesce instruction detected
1246  bool quiesce = false;
1247 
1248  TheISA::MachInst *cacheInsts =
1249  reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
1250 
1251  const unsigned numInsts = fetchBufferSize / instSize;
1252  unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1253 
1254  // Loop through instruction memory from the cache.
1255  // Keep issuing while fetchWidth is available and branch is not
1256  // predicted taken
1257  while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
1258  && !predictedBranch && !quiesce) {
1259  // We need to process more memory if we aren't going to get a
1260  // StaticInst from the rom, the current macroop, or what's already
1261  // in the decoder.
1262  bool needMem = !inRom && !curMacroop &&
1263  !decoder[tid]->instReady();
1264  fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1265  Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1266 
1267  if (needMem) {
1268  // If buffer is no longer valid or fetchAddr has moved to point
1269  // to the next cache block then start fetch from icache.
1270  if (!fetchBufferValid[tid] ||
1271  fetchBufferBlockPC != fetchBufferPC[tid])
1272  break;
1273 
1274  if (blkOffset >= numInsts) {
1275  // We need to process more memory, but we've run out of the
1276  // current block.
1277  break;
1278  }
1279 
1280  if (ISA_HAS_DELAY_SLOT && pcOffset == 0) {
1281  // Walk past any annulled delay slot instructions.
1282  Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask;
1283  while (fetchAddr != pcAddr && blkOffset < numInsts) {
1284  blkOffset++;
1285  fetchAddr += instSize;
1286  }
1287  if (blkOffset >= numInsts)
1288  break;
1289  }
1290 
1291  MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]);
1292  decoder[tid]->moreBytes(thisPC, fetchAddr, inst);
1293 
1294  if (decoder[tid]->needMoreBytes()) {
1295  blkOffset++;
1296  fetchAddr += instSize;
1297  pcOffset += instSize;
1298  }
1299  }
1300 
1301  // Extract as many instructions and/or microops as we can from
1302  // the memory we've processed so far.
1303  do {
1304  if (!(curMacroop || inRom)) {
1305  if (decoder[tid]->instReady()) {
1306  staticInst = decoder[tid]->decode(thisPC);
1307 
1308  // Increment stat of fetched instructions.
1309  ++fetchedInsts;
1310 
1311  if (staticInst->isMacroop()) {
1312  curMacroop = staticInst;
1313  } else {
1314  pcOffset = 0;
1315  }
1316  } else {
1317  // We need more bytes for this instruction so blkOffset and
1318  // pcOffset will be updated
1319  break;
1320  }
1321  }
1322  // Whether we're moving to a new macroop because we're at the
1323  // end of the current one, or the branch predictor incorrectly
1324  // thinks we are...
1325  bool newMacro = false;
1326  if (curMacroop || inRom) {
1327  if (inRom) {
1328  staticInst = cpu->microcodeRom.fetchMicroop(
1329  thisPC.microPC(), curMacroop);
1330  } else {
1331  staticInst = curMacroop->fetchMicroop(thisPC.microPC());
1332  }
1333  newMacro |= staticInst->isLastMicroop();
1334  }
1335 
1336  DynInstPtr instruction =
1337  buildInst(tid, staticInst, curMacroop,
1338  thisPC, nextPC, true);
1339 
1340  ppFetch->notify(instruction);
1341  numInst++;
1342 
1343 #if TRACING_ON
1344  if (DTRACE(O3PipeView)) {
1345  instruction->fetchTick = curTick();
1346  }
1347 #endif
1348 
1349  nextPC = thisPC;
1350 
1351  // If we're branching after this instruction, quit fetching
1352  // from the same block.
1353  predictedBranch |= thisPC.branching();
1354  predictedBranch |=
1355  lookupAndUpdateNextPC(instruction, nextPC);
1356  if (predictedBranch) {
1357  DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
1358  }
1359 
1360  newMacro |= thisPC.instAddr() != nextPC.instAddr();
1361 
1362  // Move to the next instruction, unless we have a branch.
1363  thisPC = nextPC;
1364  inRom = isRomMicroPC(thisPC.microPC());
1365 
1366  if (newMacro) {
1367  fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
1368  blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1369  pcOffset = 0;
1370  curMacroop = NULL;
1371  }
1372 
1373  if (instruction->isQuiesce()) {
1374  DPRINTF(Fetch,
1375  "Quiesce instruction encountered, halting fetch!\n");
1376  fetchStatus[tid] = QuiescePending;
1377  status_change = true;
1378  quiesce = true;
1379  break;
1380  }
1381  } while ((curMacroop || decoder[tid]->instReady()) &&
1382  numInst < fetchWidth &&
1383  fetchQueue[tid].size() < fetchQueueSize);
1384 
1385  // Re-evaluate whether the next instruction to fetch is in micro-op ROM
1386  // or not.
1387  inRom = isRomMicroPC(thisPC.microPC());
1388  }
1389 
1390  if (predictedBranch) {
1391  DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
1392  "instruction encountered.\n", tid);
1393  } else if (numInst >= fetchWidth) {
1394  DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
1395  "for this cycle.\n", tid);
1396  } else if (blkOffset >= fetchBufferSize) {
1397  DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of the"
1398  "fetch buffer.\n", tid);
1399  }
1400 
1401  macroop[tid] = curMacroop;
1402  fetchOffset[tid] = pcOffset;
1403 
1404  if (numInst > 0) {
1405  wroteToTimeBuffer = true;
1406  }
1407 
1408  pc[tid] = thisPC;
1409 
1410  // pipeline a fetch if we're crossing a fetch buffer boundary and not in
1411  // a state that would preclude fetching
1412  fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1413  Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1414  issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
1415  fetchStatus[tid] != IcacheWaitResponse &&
1416  fetchStatus[tid] != ItlbWait &&
1417  fetchStatus[tid] != IcacheWaitRetry &&
1418  fetchStatus[tid] != QuiescePending &&
1419  !curMacroop;
1420 }
1421 
1422 template<class Impl>
1423 void
1425 {
1426  if (retryPkt != NULL) {
1427  assert(cacheBlocked);
1428  assert(retryTid != InvalidThreadID);
1429  assert(fetchStatus[retryTid] == IcacheWaitRetry);
1430 
1431  if (cpu->getInstPort().sendTimingReq(retryPkt)) {
1432  fetchStatus[retryTid] = IcacheWaitResponse;
1433  // Notify Fetch Request probe when a retryPkt is successfully sent.
1434  // Note that notify must be called before retryPkt is set to NULL.
1435  ppFetchRequestSent->notify(retryPkt->req);
1436  retryPkt = NULL;
1437  retryTid = InvalidThreadID;
1438  cacheBlocked = false;
1439  }
1440  } else {
1441  assert(retryTid == InvalidThreadID);
1442  // Access has been squashed since it was sent out. Just clear
1443  // the cache being blocked.
1444  cacheBlocked = false;
1445  }
1446 }
1447 
1449 // //
1450 // SMT FETCH POLICY MAINTAINED HERE //
1451 // //
1453 template<class Impl>
1454 ThreadID
1456 {
1457  if (numThreads > 1) {
1458  switch (fetch_priority) {
1459 
1460  case SingleThread:
1461  return 0;
1462 
1463  case RoundRobin:
1464  return roundRobin();
1465 
1466  case IQ:
1467  return iqCount();
1468 
1469  case LSQ:
1470  return lsqCount();
1471 
1472  case Branch:
1473  return branchCount();
1474 
1475  default:
1476  return InvalidThreadID;
1477  }
1478  } else {
1479  list<ThreadID>::iterator thread = activeThreads->begin();
1480  if (thread == activeThreads->end()) {
1481  return InvalidThreadID;
1482  }
1483 
1484  ThreadID tid = *thread;
1485 
1486  if (fetchStatus[tid] == Running ||
1487  fetchStatus[tid] == IcacheAccessComplete ||
1488  fetchStatus[tid] == Idle) {
1489  return tid;
1490  } else {
1491  return InvalidThreadID;
1492  }
1493  }
1494 }
1495 
1496 
1497 template<class Impl>
1498 ThreadID
1500 {
1501  list<ThreadID>::iterator pri_iter = priorityList.begin();
1502  list<ThreadID>::iterator end = priorityList.end();
1503 
1504  ThreadID high_pri;
1505 
1506  while (pri_iter != end) {
1507  high_pri = *pri_iter;
1508 
1509  assert(high_pri <= numThreads);
1510 
1511  if (fetchStatus[high_pri] == Running ||
1512  fetchStatus[high_pri] == IcacheAccessComplete ||
1513  fetchStatus[high_pri] == Idle) {
1514 
1515  priorityList.erase(pri_iter);
1516  priorityList.push_back(high_pri);
1517 
1518  return high_pri;
1519  }
1520 
1521  pri_iter++;
1522  }
1523 
1524  return InvalidThreadID;
1525 }
1526 
1527 template<class Impl>
1528 ThreadID
1530 {
1531  //sorted from lowest->highest
1532  std::priority_queue<unsigned,vector<unsigned>,
1533  std::greater<unsigned> > PQ;
1534  std::map<unsigned, ThreadID> threadMap;
1535 
1536  list<ThreadID>::iterator threads = activeThreads->begin();
1537  list<ThreadID>::iterator end = activeThreads->end();
1538 
1539  while (threads != end) {
1540  ThreadID tid = *threads++;
1541  unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1542 
1543  //we can potentially get tid collisions if two threads
1544  //have the same iqCount, but this should be rare.
1545  PQ.push(iqCount);
1546  threadMap[iqCount] = tid;
1547  }
1548 
1549  while (!PQ.empty()) {
1550  ThreadID high_pri = threadMap[PQ.top()];
1551 
1552  if (fetchStatus[high_pri] == Running ||
1553  fetchStatus[high_pri] == IcacheAccessComplete ||
1554  fetchStatus[high_pri] == Idle)
1555  return high_pri;
1556  else
1557  PQ.pop();
1558 
1559  }
1560 
1561  return InvalidThreadID;
1562 }
1563 
1564 template<class Impl>
1565 ThreadID
1567 {
1568  //sorted from lowest->highest
1569  std::priority_queue<unsigned,vector<unsigned>,
1570  std::greater<unsigned> > PQ;
1571  std::map<unsigned, ThreadID> threadMap;
1572 
1573  list<ThreadID>::iterator threads = activeThreads->begin();
1574  list<ThreadID>::iterator end = activeThreads->end();
1575 
1576  while (threads != end) {
1577  ThreadID tid = *threads++;
1578  unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1579 
1580  //we can potentially get tid collisions if two threads
1581  //have the same iqCount, but this should be rare.
1582  PQ.push(ldstqCount);
1583  threadMap[ldstqCount] = tid;
1584  }
1585 
1586  while (!PQ.empty()) {
1587  ThreadID high_pri = threadMap[PQ.top()];
1588 
1589  if (fetchStatus[high_pri] == Running ||
1590  fetchStatus[high_pri] == IcacheAccessComplete ||
1591  fetchStatus[high_pri] == Idle)
1592  return high_pri;
1593  else
1594  PQ.pop();
1595  }
1596 
1597  return InvalidThreadID;
1598 }
1599 
1600 template<class Impl>
1601 ThreadID
1603 {
1604 #if 0
1605  list<ThreadID>::iterator thread = activeThreads->begin();
1606  assert(thread != activeThreads->end());
1607  ThreadID tid = *thread;
1608 #endif
1609 
1610  panic("Branch Count Fetch policy unimplemented\n");
1611  return InvalidThreadID;
1612 }
1613 
1614 template<class Impl>
1615 void
1617 {
1618  if (!issuePipelinedIfetch[tid]) {
1619  return;
1620  }
1621 
1622  // The next PC to access.
1623  TheISA::PCState thisPC = pc[tid];
1624 
1625  if (isRomMicroPC(thisPC.microPC())) {
1626  return;
1627  }
1628 
1629  Addr pcOffset = fetchOffset[tid];
1630  Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1631 
1632  // Align the fetch PC so its at the start of a fetch buffer segment.
1633  Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1634 
1635  // Unless buffer already got the block, fetch it from icache.
1636  if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
1637  DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, "
1638  "starting at PC %s.\n", tid, thisPC);
1639 
1640  fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1641  }
1642 }
1643 
1644 template<class Impl>
1645 void
1647  DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1648 
1649  // @todo Per-thread stats
1650 
1651  if (stalls[tid].drain) {
1652  ++fetchPendingDrainCycles;
1653  DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1654  } else if (activeThreads->empty()) {
1655  ++fetchNoActiveThreadStallCycles;
1656  DPRINTF(Fetch, "Fetch has no active thread!\n");
1657  } else if (fetchStatus[tid] == Blocked) {
1658  ++fetchBlockedCycles;
1659  DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
1660  } else if (fetchStatus[tid] == Squashing) {
1661  ++fetchSquashCycles;
1662  DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
1663  } else if (fetchStatus[tid] == IcacheWaitResponse) {
1664  ++icacheStallCycles;
1665  DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n",
1666  tid);
1667  } else if (fetchStatus[tid] == ItlbWait) {
1668  ++fetchTlbCycles;
1669  DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to "
1670  "finish!\n", tid);
1671  } else if (fetchStatus[tid] == TrapPending) {
1672  ++fetchPendingTrapStallCycles;
1673  DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n",
1674  tid);
1675  } else if (fetchStatus[tid] == QuiescePending) {
1676  ++fetchPendingQuiesceStallCycles;
1677  DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce "
1678  "instruction!\n", tid);
1679  } else if (fetchStatus[tid] == IcacheWaitRetry) {
1680  ++fetchIcacheWaitRetryStallCycles;
1681  DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n",
1682  tid);
1683  } else if (fetchStatus[tid] == NoGoodAddr) {
1684  DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n",
1685  tid);
1686  } else {
1687  DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n",
1688  tid, fetchStatus[tid]);
1689  }
1690 }
1691 
1692 #endif//__CPU_O3_FETCH_IMPL_HH__
Impl::O3CPU O3CPU
Definition: fetch.hh:78
#define DPRINTF(x,...)
Definition: trace.hh:212
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition: info.hh:51
Impl::DynInst DynInst
Definition: fetch.hh:76
unsigned fetchWidth
The width of fetch in instructions.
Definition: fetch.hh:457
ThreadID iqCount()
Returns the appropriate thread to fetch using the IQ count policy.
Definition: fetch_impl.hh:1529
void profileStall(ThreadID tid)
Profile the reasons of fetch stall.
Definition: fetch_impl.hh:1646
TheISA::Decoder * decoder[Impl::MaxThreads]
The decoder.
Definition: fetch.hh:360
decltype(nullptr) constexpr NoFault
Definition: types.hh:189
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
const std::string & name()
Definition: trace.cc:49
void tick()
Ticks the fetch stage, processing all inputs signals and fetching as many instructions as possible...
Definition: fetch_impl.hh:889
FetchPriority fetchPolicy
Fetch policy.
Definition: fetch.hh:193
void resetStage()
Reset this pipeline stage.
Definition: fetch_impl.hh:344
Bitfield< 7 > i
Definition: miscregs.hh:1378
void squashFromDecode(const TheISA::PCState &newPC, const DynInstPtr squashInst, const InstSeqNum seq_num, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition: fetch_impl.hh:805
ContextID contextId() const
Accessor function for context ID.
Definition: request.hh:694
FetchPriority
Fetching Policy, Add new policies here.
Definition: fetch.hh:177
#define panic(...)
Definition: misc.hh:153
void pipelineIcacheAccesses(ThreadID tid)
Pipeline the next I-cache access to the current one.
Definition: fetch_impl.hh:1616
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition: fetch_impl.hh:435
Addr fetchBufferPC[Impl::MaxThreads]
The PC of the first instruction loaded into the fetch buffer.
Definition: fetch.hh:486
Impl::DynInstPtr DynInstPtr
Definition: fetch.hh:77
bool isMacroop() const
Definition: static_inst.hh:168
bool lookupAndUpdateNextPC(DynInstPtr &inst, TheISA::PCState &pc)
Looks up in the branch predictor to see if the next PC should be either next PC+=MachInst or a branch...
Definition: fetch_impl.hh:550
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:146
void processCacheCompletion(PacketPtr pkt)
Processes cache completion event.
Definition: fetch_impl.hh:379
virtual StaticInstPtr fetchMicroop(MicroPC upc) const
Return the microop that goes with a particular micropc.
Definition: static_inst.cc:66
Definition: lsq.hh:58
void setAccessLatency()
Set/Get the time taken to complete this request's access, not including the time to successfully tran...
Definition: request.hh:738
ThreadID numThreads
Number of threads.
Definition: fetch.hh:507
T gtoh(T value)
Definition: byteswap.hh:179
std::enable_if< std::is_integral< T >::value, T >::type random()
Use the SFINAE idiom to choose an implementation based on whether the type is integral or floating po...
Definition: random.hh:83
uint32_t MachInst
Definition: types.hh:40
#define ISA_HAS_DELAY_SLOT
Definition: isa_traits.hh:49
FetchStatus
Overall fetch status.
Definition: fetch.hh:155
void deactivateThread(ThreadID tid)
For priority-based fetch policies, need to keep update priorityList.
Definition: fetch_impl.hh:539
#define warn(...)
Definition: misc.hh:219
TheISA::MachInst MachInst
Typedefs from ISA.
Definition: fetch.hh:85
void takeOverFrom()
Takes over from another CPU's thread.
Definition: fetch_impl.hh:484
void drainStall(ThreadID tid)
Stall the fetch stage after reaching a safe drain point.
Definition: fetch_impl.hh:493
ThreadID getFetchingThread(FetchPriority &fetch_priority)
Returns the appropriate thread to fetch, given the fetch policy.
Definition: fetch_impl.hh:1455
Tick curTick()
The current simulated tick.
Definition: core.hh:47
void quiesce(ThreadContext *tc)
Definition: pseudo_inst.cc:240
void regStats()
Registers statistics.
Definition: fetch_impl.hh:181
#define DTRACE(x)
Definition: trace.hh:210
bool isDrained() const
Has the stage drained?
Definition: fetch_impl.hh:453
ThreadID roundRobin()
Returns the appropriate thread to fetch using a round robin policy.
Definition: fetch_impl.hh:1499
const ExtMachInst NoopMachInst
Definition: isa_traits.hh:116
void switchToActive()
Changes the status of this stage to active, and indicates this to the CPU.
Definition: fetch_impl.hh:513
int instSize
Size of instructions.
Definition: fetch.hh:498
bool checkStall(ThreadID tid) const
Checks if a thread is stalled.
Definition: fetch_impl.hh:820
void fetch(bool &status_change)
Does the actual fetching of instructions and passing them on to the next stage.
Definition: fetch_impl.hh:1150
void finishTranslation(const Fault &fault, RequestPtr mem_req)
Definition: fetch_impl.hh:643
Addr getPaddr() const
Definition: request.hh:519
#define fatal(...)
Definition: misc.hh:163
const RequestPtr req
A pointer to the original request.
Definition: packet.hh:304
bool fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
Fetches the cache line that contains the fetch PC.
Definition: fetch_impl.hh:593
void recvReqRetry()
Handles retrying the fetch access.
Definition: fetch_impl.hh:1424
DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
DefaultFetch constructor.
Definition: fetch_impl.hh:81
uint64_t InstSeqNum
Definition: inst_seq.hh:40
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets pointer to list of active threads.
Definition: fetch_impl.hh:317
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
void regProbePoints()
Registers probes.
Definition: fetch_impl.hh:171
void advancePC(PCState &pc, const StaticInstPtr &inst)
Definition: utility.hh:108
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245
uint8_t * fetchBuffer[Impl::MaxThreads]
The fetch data that is being fetched and buffered.
Definition: fetch.hh:483
static bool isRomMicroPC(MicroPC upc)
Definition: types.hh:161
ThreadID branchCount()
Returns the appropriate thread to fetch using the branch count policy.
Definition: fetch_impl.hh:1602
const FlagsType total
Print the total.
Definition: info.hh:49
const ThreadID InvalidThreadID
Definition: types.hh:172
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:171
void setTimeBuffer(TimeBuffer< TimeStruct > *time_buffer)
Sets the main backwards communication time buffer pointer.
Definition: fetch_impl.hh:304
void setFetchQueue(TimeBuffer< FetchStruct > *fq_ptr)
Sets pointer to time buffer used to communicate to the next stage.
Definition: fetch_impl.hh:324
int size()
Definition: pagetable.hh:146
Declaration of the Packet class.
GenericISA::SimplePCState< MachInst > PCState
Definition: types.hh:43
The request was an instruction fetch.
Definition: request.hh:104
FetchStatus updateFetchStatus()
Updates overall fetch stage status; to be called at the end of each cycle.
Definition: fetch_impl.hh:835
Addr getVaddr() const
Definition: request.hh:616
void switchToInactive()
Changes the status of this stage to inactive, and indicates this to the CPU.
Definition: fetch_impl.hh:526
void startupStage()
Initialize stage.
Definition: fetch_impl.hh:332
Random random_mt
Definition: random.cc:100
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:947
wire getWire(int idx)
Definition: timebuf.hh:232
IntReg pc
Definition: remote_gdb.hh:91
void squash(const TheISA::PCState &newPC, const InstSeqNum seq_num, DynInstPtr squashInst, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition: fetch_impl.hh:875
unsigned fetchBufferSize
The size of the fetch buffer in bytes.
Definition: fetch.hh:477
ThreadID lsqCount()
Returns the appropriate thread to fetch using the LSQ count policy.
Definition: fetch_impl.hh:1566
void wakeFromQuiesce()
Tells fetch to wake up from a quiesce instruction.
Definition: fetch_impl.hh:503
const T * getConstPtr() const
Definition: packet.hh:967
void drainResume()
Resume after a drain.
Definition: fetch_impl.hh:425
bool checkSignalsAndUpdate(ThreadID tid)
Checks all input signals and updates the status as necessary.
Definition: fetch_impl.hh:991
std::string name() const
Returns the name of fetch.
Definition: fetch_impl.hh:164
DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst, StaticInstPtr curMacroop, TheISA::PCState thisPC, TheISA::PCState nextPC, bool trace)
Definition: fetch_impl.hh:1096
bool isLastMicroop() const
Definition: static_inst.hh:171
std::shared_ptr< FaultBase > Fault
Definition: types.hh:184
BPredUnit * branchPred
BPredUnit.
Definition: fetch.hh:413
uint32_t taskId() const
Definition: request.hh:630
void doSquash(const TheISA::PCState &newPC, const DynInstPtr squashInst, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition: fetch_impl.hh:752
unsigned int cacheBlkSize
Cache block size.
Definition: fetch.hh:472
bool fetchBufferValid[Impl::MaxThreads]
Whether or not the fetch buffer data is valid.
Definition: fetch.hh:495
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:102

Generated on Fri Jun 9 2017 13:03:43 for gem5 by doxygen 1.8.6