gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
trace_cpu.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 - 2016 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  *
37  * Authors: Radhika Jagtap
38  * Andreas Hansson
39  * Thomas Grass
40  */
41 
42 #include "cpu/trace/trace_cpu.hh"
43 
44 #include "sim/sim_exit.hh"
45 
46 // Declare and initialize the static counter for number of trace CPUs.
48 
49 TraceCPU::TraceCPU(TraceCPUParams *params)
50  : BaseCPU(params),
51  icachePort(this),
52  dcachePort(this),
53  instMasterID(params->system->getMasterId(name() + ".inst")),
54  dataMasterID(params->system->getMasterId(name() + ".data")),
55  instTraceFile(params->instTraceFile),
56  dataTraceFile(params->dataTraceFile),
57  icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58  dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59  params),
60  icacheNextEvent(this),
61  dcacheNextEvent(this),
62  oneTraceComplete(false),
63  traceOffset(0),
64  execCompleteEvent(nullptr),
65  enableEarlyExit(params->enableEarlyExit),
66  progressMsgInterval(params->progressMsgInterval),
67  progressMsgThreshold(params->progressMsgInterval)
68 {
69  // Increment static counter for number of Trace CPUs.
71 
72  // Check that the python parameters for sizes of ROB, store buffer and
73  // load buffer do not overflow the corresponding C++ variables.
74  fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
75  "max. value of %d.\n", params->sizeROB, UINT16_MAX);
76  fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
77  "exceeds the max. value of %d.\n", params->sizeROB,
78  UINT16_MAX);
79  fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
80  " %d exceeds the max. value of %d.\n",
81  params->sizeLoadBuffer, UINT16_MAX);
82 }
83 
85 {
86 
87 }
88 
89 TraceCPU*
90 TraceCPUParams::create()
91 {
92  return new TraceCPU(this);
93 }
94 
95 void
96 TraceCPU::updateNumOps(uint64_t rob_num)
97 {
98  numOps = rob_num;
100  inform("%s: %i insts committed\n", name(), progressMsgThreshold);
102  }
103 }
104 
105 void
107 {
108  // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
109  assert(!getInstPort().isConnected());
110  assert(oldCPU->getInstPort().isConnected());
111  BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
112  oldCPU->getInstPort().unbind();
113  getInstPort().bind(inst_peer_port);
114 
115  assert(!getDataPort().isConnected());
116  assert(oldCPU->getDataPort().isConnected());
117  BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
118  oldCPU->getDataPort().unbind();
119  getDataPort().bind(data_peer_port);
120 }
121 
122 void
124 {
125  DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
126  "\n", instTraceFile);
127  DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
128  dataTraceFile);
129 
130  BaseCPU::init();
131 
132  // Get the send tick of the first instruction read request
133  Tick first_icache_tick = icacheGen.init();
134 
135  // Get the send tick of the first data read/write request
136  Tick first_dcache_tick = dcacheGen.init();
137 
138  // Set the trace offset as the minimum of that in both traces
139  traceOffset = std::min(first_icache_tick, first_dcache_tick);
140  inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
141  name(), traceOffset);
142 
143  // Schedule next icache and dcache event by subtracting the offset
144  schedule(icacheNextEvent, first_icache_tick - traceOffset);
145  schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
146 
147  // Adjust the trace offset for the dcache generator's ready nodes
148  // We don't need to do this for the icache generator as it will
149  // send its first request at the first event and schedule subsequent
150  // events using a relative tick delta
152 
153  // If the Trace CPU simulation is configured to exit on any one trace
154  // completion then we don't need a counted event to count down all Trace
155  // CPUs in the system. If not then instantiate a counted event.
156  if (!enableEarlyExit) {
157  // The static counter for number of Trace CPUs is correctly set at
158  // this point so create an event and pass it.
159  execCompleteEvent = new CountedExitEvent("end of all traces reached.",
160  numTraceCPUs);
161  }
162 
163 }
164 
165 void
167 {
168  DPRINTF(TraceCPUInst, "IcacheGen event.\n");
169 
170  // Try to send the current packet or a retry packet if there is one
171  bool sched_next = icacheGen.tryNext();
172  // If packet sent successfully, schedule next event
173  if (sched_next) {
174  DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
175  "at %d.\n", curTick() + icacheGen.tickDelta());
176  schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
178  } else {
179  // check if traceComplete. If not, do nothing because sending failed
180  // and next event will be scheduled via RecvRetry()
181  if (icacheGen.isTraceComplete()) {
182  // If this is the first trace to complete, set the variable. If it
183  // is already set then both traces are complete to exit sim.
185  }
186  }
187  return;
188 }
189 
190 void
192 {
193  DPRINTF(TraceCPUData, "DcacheGen event.\n");
194 
195  // Update stat for numCycles
196  numCycles = clockEdge() / clockPeriod();
197 
198  dcacheGen.execute();
199  if (dcacheGen.isExecComplete()) {
201  }
202 }
203 
204 void
206 {
207  if (!oneTraceComplete) {
208  oneTraceComplete = true;
209  } else {
210  // Schedule event to indicate execution is complete as both
211  // instruction and data access traces have been played back.
212  inform("%s: Execution complete.\n", name());
213  // If the replay is configured to exit early, that is when any one
214  // execution is complete then exit immediately and return. Otherwise,
215  // schedule the counted exit that counts down completion of each Trace
216  // CPU.
217  if (enableEarlyExit) {
218  exitSimLoop("End of trace reached");
219  } else {
220  schedule(*execCompleteEvent, curTick());
221  }
222  }
223 }
224 
225 void
227 {
228 
229  BaseCPU::regStats();
230 
232  .name(name() + ".numSchedDcacheEvent")
233  .desc("Number of events scheduled to trigger data request generator")
234  ;
235 
237  .name(name() + ".numSchedIcacheEvent")
238  .desc("Number of events scheduled to trigger instruction request generator")
239  ;
240 
241  numOps
242  .name(name() + ".numOps")
243  .desc("Number of micro-ops simulated by the Trace CPU")
244  ;
245 
246  cpi
247  .name(name() + ".cpi")
248  .desc("Cycles per micro-op used as a proxy for CPI")
249  .precision(6)
250  ;
251  cpi = numCycles/numOps;
252 
255 }
256 
257 void
259 {
260  using namespace Stats;
261 
263  .name(name() + ".maxDependents")
264  .desc("Max number of dependents observed on a node")
265  ;
266 
268  .name(name() + ".maxReadyListSize")
269  .desc("Max size of the ready list observed")
270  ;
271 
273  .name(name() + ".numSendAttempted")
274  .desc("Number of first attempts to send a request")
275  ;
276 
278  .name(name() + ".numSendSucceeded")
279  .desc("Number of successful first attempts")
280  ;
281 
283  .name(name() + ".numSendFailed")
284  .desc("Number of failed first attempts")
285  ;
286 
288  .name(name() + ".numRetrySucceeded")
289  .desc("Number of successful retries")
290  ;
291 
293  .name(name() + ".numSplitReqs")
294  .desc("Number of split requests")
295  ;
296 
297  numSOLoads
298  .name(name() + ".numSOLoads")
299  .desc("Number of strictly ordered loads")
300  ;
301 
303  .name(name() + ".numSOStores")
304  .desc("Number of strictly ordered stores")
305  ;
306 
308  .name(name() + ".dataLastTick")
309  .desc("Last tick simulated from the elastic data trace")
310  ;
311 }
312 
313 Tick
315 {
316  DPRINTF(TraceCPUData, "Initializing data memory request generator "
317  "DcacheGen: elastic issue with retry.\n");
318 
319  if (!readNextWindow())
320  panic("Trace has %d elements. It must have at least %d elements.\n",
321  depGraph.size(), 2 * windowSize);
322  DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
323  depGraph.size());
324 
325  if (!readNextWindow())
326  panic("Trace has %d elements. It must have at least %d elements.\n",
327  depGraph.size(), 2 * windowSize);
328  DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
329  depGraph.size());
330 
331  // Print readyList
332  if (DTRACE(TraceCPUData)) {
333  printReadyList();
334  }
335  auto free_itr = readyList.begin();
336  DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
337  " is %d.\n", free_itr->seqNum, free_itr->execTick);
338  // Return the execute tick of the earliest ready node so that an event
339  // can be scheduled to call execute()
340  return (free_itr->execTick);
341 }
342 
343 void
345  for (auto& free_node : readyList) {
346  free_node.execTick -= offset;
347  }
348 }
349 
350 void
352 {
353  trace.reset();
354 }
355 
356 bool
358 {
359 
360  // Read and add next window
361  DPRINTF(TraceCPUData, "Reading next window from file.\n");
362 
363  if (traceComplete) {
364  // We are at the end of the file, thus we have no more records.
365  // Return false.
366  return false;
367  }
368 
369  DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
370  depGraph.size());
371 
372  uint32_t num_read = 0;
373  while (num_read != windowSize) {
374 
375  // Create a new graph node
376  GraphNode* new_node = new GraphNode;
377 
378  // Read the next line to get the next record. If that fails then end of
379  // trace has been reached and traceComplete needs to be set in addition
380  // to returning false.
381  if (!trace.read(new_node)) {
382  DPRINTF(TraceCPUData, "\tTrace complete!\n");
383  traceComplete = true;
384  return false;
385  }
386 
387  // Annotate the ROB dependencies of the new node onto the parent nodes.
388  addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
389  // Annotate the register dependencies of the new node onto the parent
390  // nodes.
391  addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
392 
393  num_read++;
394  // Add to map
395  depGraph[new_node->seqNum] = new_node;
396  if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
397  // Source dependencies are already complete, check if resources
398  // are available and issue. The execution time is approximated
399  // to current time plus the computational delay.
400  checkAndIssue(new_node);
401  }
402  }
403 
404  DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
405  depGraph.size());
406  return true;
407 }
408 
409 template<typename T> void
411  T& dep_array, uint8_t& num_dep)
412 {
413  for (auto& a_dep : dep_array) {
414  // The convention is to set the dependencies starting with the first
415  // index in the ROB and register dependency arrays. Thus, when we reach
416  // a dependency equal to the initialisation value of zero, we know have
417  // iterated over all dependencies and can break.
418  if (a_dep == 0)
419  break;
420  // We look up the valid dependency, i.e. the parent of this node
421  auto parent_itr = depGraph.find(a_dep);
422  if (parent_itr != depGraph.end()) {
423  // If the parent is found, it is yet to be executed. Append a
424  // pointer to the new node to the dependents list of the parent
425  // node.
426  parent_itr->second->dependents.push_back(new_node);
427  auto num_depts = parent_itr->second->dependents.size();
428  maxDependents = std::max<double>(num_depts, maxDependents.value());
429  } else {
430  // The dependency is not found in the graph. So consider
431  // the execution of the parent is complete, i.e. remove this
432  // dependency.
433  a_dep = 0;
434  num_dep--;
435  }
436  }
437 }
438 
439 void
441 {
442  DPRINTF(TraceCPUData, "Execute start occupancy:\n");
443  DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
444  "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
445  depFreeQueue.size());
447 
448  // Read next window to make sure that dependents of all dep-free nodes
449  // are in the depGraph
450  if (nextRead) {
451  readNextWindow();
452  nextRead = false;
453  }
454 
455  // First attempt to issue the pending dependency-free nodes held
456  // in depFreeQueue. If resources have become available for a node,
457  // then issue it, i.e. add the node to readyList.
458  while (!depFreeQueue.empty()) {
459  if (checkAndIssue(depFreeQueue.front(), false)) {
460  DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
461  "%lli.\n", (depFreeQueue.front())->seqNum);
462  depFreeQueue.pop();
463  } else {
464  break;
465  }
466  }
467  // Proceed to execute from readyList
468  auto graph_itr = depGraph.begin();
469  auto free_itr = readyList.begin();
470  // Iterate through readyList until the next free node has its execute
471  // tick later than curTick or the end of readyList is reached
472  while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
473 
474  // Get pointer to the node to be executed
475  graph_itr = depGraph.find(free_itr->seqNum);
476  assert(graph_itr != depGraph.end());
477  GraphNode* node_ptr = graph_itr->second;
478 
479  // If there is a retryPkt send that else execute the load
480  if (retryPkt) {
481  // The retryPkt must be the request that was created by the
482  // first node in the readyList.
483  if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
484  panic("Retry packet's seqence number does not match "
485  "the first node in the readyList.\n");
486  }
487  if (port.sendTimingReq(retryPkt)) {
489  retryPkt = nullptr;
490  }
491  } else if (node_ptr->isLoad() || node_ptr->isStore()) {
492  // If there is no retryPkt, attempt to send a memory request in
493  // case of a load or store node. If the send fails, executeMemReq()
494  // returns a packet pointer, which we save in retryPkt. In case of
495  // a comp node we don't do anything and simply continue as if the
496  // execution of the comp node succedded.
497  retryPkt = executeMemReq(node_ptr);
498  }
499  // If the retryPkt or a new load/store node failed, we exit from here
500  // as a retry from cache will bring the control to execute(). The
501  // first node in readyList then, will be the failed node.
502  if (retryPkt) {
503  break;
504  }
505 
506  // Proceed to remove dependencies for the successfully executed node.
507  // If it is a load which is not strictly ordered and we sent a
508  // request for it successfully, we do not yet mark any register
509  // dependencies complete. But as per dependency modelling we need
510  // to mark ROB dependencies of load and non load/store nodes which
511  // are based on successful sending of the load as complete.
512  if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
513  // If execute succeeded mark its dependents as complete
514  DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
515  "dependents..\n", node_ptr->seqNum);
516 
517  auto child_itr = (node_ptr->dependents).begin();
518  while (child_itr != (node_ptr->dependents).end()) {
519  // ROB dependency of a store on a load must not be removed
520  // after load is sent but after response is received
521  if (!(*child_itr)->isStore() &&
522  (*child_itr)->removeRobDep(node_ptr->seqNum)) {
523 
524  // Check if the child node has become dependency free
525  if ((*child_itr)->numRobDep == 0 &&
526  (*child_itr)->numRegDep == 0) {
527 
528  // Source dependencies are complete, check if
529  // resources are available and issue
530  checkAndIssue(*child_itr);
531  }
532  // Remove this child for the sent load and point to new
533  // location of the element following the erased element
534  child_itr = node_ptr->dependents.erase(child_itr);
535  } else {
536  // This child is not dependency-free, point to the next
537  // child
538  child_itr++;
539  }
540  }
541  } else {
542  // If it is a strictly ordered load mark its dependents as complete
543  // as we do not send a request for this case. If it is a store or a
544  // comp node we also mark all its dependents complete.
545  DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
546  " up dependents..\n", node_ptr->seqNum);
547 
548  for (auto child : node_ptr->dependents) {
549  // If the child node is dependency free removeDepOnInst()
550  // returns true.
551  if (child->removeDepOnInst(node_ptr->seqNum)) {
552  // Source dependencies are complete, check if resources
553  // are available and issue
554  checkAndIssue(child);
555  }
556  }
557  }
558 
559  // After executing the node, remove from readyList and delete node.
560  readyList.erase(free_itr);
561  // If it is a cacheable load which was sent, don't delete
562  // just yet. Delete it in completeMemAccess() after the
563  // response is received. If it is an strictly ordered
564  // load, it was not sent and all dependencies were simply
565  // marked complete. Thus it is safe to delete it. For
566  // stores and non load/store nodes all dependencies were
567  // marked complete so it is safe to delete it.
568  if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
569  // Release all resources occupied by the completed node
570  hwResource.release(node_ptr);
571  // clear the dynamically allocated set of dependents
572  (node_ptr->dependents).clear();
573  // Update the stat for numOps simulated
574  owner.updateNumOps(node_ptr->robNum);
575  // delete node
576  delete node_ptr;
577  // remove from graph
578  depGraph.erase(graph_itr);
579  }
580  // Point to first node to continue to next iteration of while loop
581  free_itr = readyList.begin();
582  } // end of while loop
583 
584  // Print readyList, sizes of queues and resource status after updating
585  if (DTRACE(TraceCPUData)) {
586  printReadyList();
587  DPRINTF(TraceCPUData, "Execute end occupancy:\n");
588  DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
589  "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
590  depFreeQueue.size());
592  }
593 
594  if (retryPkt) {
595  DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
596  "event from the cache for seq. num %lli.\n",
598  return;
599  }
600  // If the size of the dependency graph is less than the dependency window
601  // then read from the trace file to populate the graph next time we are in
602  // execute.
603  if (depGraph.size() < windowSize && !traceComplete)
604  nextRead = true;
605 
606  // If cache is not blocked, schedule an event for the first execTick in
607  // readyList else retry from cache will schedule the event. If the ready
608  // list is empty then check if the next pending node has resources
609  // available to issue. If yes, then schedule an event for the next cycle.
610  if (!readyList.empty()) {
611  Tick next_event_tick = std::max(readyList.begin()->execTick,
612  curTick());
613  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
614  next_event_tick);
615  owner.schedDcacheNextEvent(next_event_tick);
616  } else if (readyList.empty() && !depFreeQueue.empty() &&
618  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
619  owner.clockEdge(Cycles(1)));
620  owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
621  }
622 
623  // If trace is completely read, readyList is empty and depGraph is empty,
624  // set execComplete to true
625  if (depGraph.empty() && readyList.empty() && traceComplete &&
627  DPRINTF(TraceCPUData, "\tExecution Complete!\n");
628  execComplete = true;
629  dataLastTick = curTick();
630  }
631 }
632 
633 PacketPtr
635 {
636 
637  DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
638  "virt addr %d, pc %#x, size %d, flags %d).\n",
639  node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
640  node_ptr->pc, node_ptr->size, node_ptr->flags);
641 
642  // If the request is strictly ordered, do not send it. Just return nullptr
643  // as if it was succesfully sent.
644  if (node_ptr->isStrictlyOrdered()) {
645  node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
646  DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
647  node_ptr->seqNum);
648  return nullptr;
649  }
650 
651  // Check if the request spans two cache lines as this condition triggers
652  // an assert fail in the L1 cache. If it does then truncate the size to
653  // access only until the end of that line and ignore the remainder. The
654  // stat counting this is useful to keep a check on how frequently this
655  // happens. If required the code could be revised to mimick splitting such
656  // a request into two.
657  unsigned blk_size = owner.cacheLineSize();
658  Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
659  if (!(blk_offset + node_ptr->size <= blk_size)) {
660  node_ptr->size = blk_size - blk_offset;
661  ++numSplitReqs;
662  }
663 
664  // Create a request and the packet containing request
665  Request* req = new Request(node_ptr->physAddr, node_ptr->size,
666  node_ptr->flags, masterID, node_ptr->seqNum,
667  ContextID(0));
668  req->setPC(node_ptr->pc);
669  // If virtual address is valid, set the asid and virtual address fields
670  // of the request.
671  if (node_ptr->virtAddr != 0) {
672  req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
673  node_ptr->flags, masterID, node_ptr->pc);
674  req->setPaddr(node_ptr->physAddr);
675  req->setReqInstSeqNum(node_ptr->seqNum);
676  }
677 
678  PacketPtr pkt;
679  uint8_t* pkt_data = new uint8_t[req->getSize()];
680  if (node_ptr->isLoad()) {
681  pkt = Packet::createRead(req);
682  } else {
683  pkt = Packet::createWrite(req);
684  memset(pkt_data, 0xA, req->getSize());
685  }
686  pkt->dataDynamic(pkt_data);
687 
688  // Call MasterPort method to send a timing request for this packet
689  bool success = port.sendTimingReq(pkt);
691 
692  if (!success) {
693  // If it fails, return the packet to retry when a retry is signalled by
694  // the cache
695  ++numSendFailed;
696  DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
697  return pkt;
698  } else {
699  // It is succeeds, return nullptr
701  return nullptr;
702  }
703 }
704 
705 bool
707 {
708  // Assert the node is dependency-free
709  assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
710 
711  // If this is the first attempt, print a debug message to indicate this.
712  if (first) {
713  DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
714  " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
715  node_ptr->robNum);
716  }
717 
718  // Check if resources are available to issue the specific node
719  if (hwResource.isAvailable(node_ptr)) {
720  // If resources are free only then add to readyList
721  DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
722  " to readyList, occupying resources.\n", node_ptr->seqNum);
723  // Compute the execute tick by adding the compute delay for the node
724  // and add the ready node to the ready list
725  addToSortedReadyList(node_ptr->seqNum,
726  owner.clockEdge() + node_ptr->compDelay);
727  // Account for the resources taken up by this issued node.
728  hwResource.occupy(node_ptr);
729  return true;
730 
731  } else {
732  if (first) {
733  // Although dependencies are complete, resources are not available.
734  DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
735  " Adding to depFreeQueue.\n", node_ptr->seqNum);
736  depFreeQueue.push(node_ptr);
737  } else {
738  DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
739  "Still pending issue.\n", node_ptr->seqNum);
740  }
741  return false;
742  }
743 }
744 
745 void
747 {
748  // Release the resources for this completed node.
749  if (pkt->isWrite()) {
750  // Consider store complete.
752  // If it is a store response then do nothing since we do not model
753  // dependencies on store completion in the trace. But if we were
754  // blocking execution due to store buffer fullness, we need to schedule
755  // an event and attempt to progress.
756  } else {
757  // If it is a load response then release the dependents waiting on it.
758  // Get pointer to the completed load
759  auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
760  assert(graph_itr != depGraph.end());
761  GraphNode* node_ptr = graph_itr->second;
762 
763  // Release resources occupied by the load
764  hwResource.release(node_ptr);
765 
766  DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
767  " dependents..\n", node_ptr->seqNum);
768 
769  for (auto child : node_ptr->dependents) {
770  if (child->removeDepOnInst(node_ptr->seqNum)) {
771  checkAndIssue(child);
772  }
773  }
774 
775  // clear the dynamically allocated set of dependents
776  (node_ptr->dependents).clear();
777  // Update the stat for numOps completed
778  owner.updateNumOps(node_ptr->robNum);
779  // delete node
780  delete node_ptr;
781  // remove from graph
782  depGraph.erase(graph_itr);
783  }
784 
785  if (DTRACE(TraceCPUData)) {
786  printReadyList();
787  }
788 
789  // If the size of the dependency graph is less than the dependency window
790  // then read from the trace file to populate the graph next time we are in
791  // execute.
792  if (depGraph.size() < windowSize && !traceComplete)
793  nextRead = true;
794 
795  // If not waiting for retry, attempt to schedule next event
796  if (!retryPkt) {
797  // We might have new dep-free nodes in the list which will have execute
798  // tick greater than or equal to curTick. But a new dep-free node might
799  // have its execute tick earlier. Therefore, attempt to reschedule. It
800  // could happen that the readyList is empty and we got here via a
801  // last remaining response. So, either the trace is complete or there
802  // are pending nodes in the depFreeQueue. The checking is done in the
803  // execute() control flow, so schedule an event to go via that flow.
804  Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
805  std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
806  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
807  next_event_tick);
808  owner.schedDcacheNextEvent(next_event_tick);
809  }
810 }
811 
812 void
814  Tick exec_tick)
815 {
816  ReadyNode ready_node;
817  ready_node.seqNum = seq_num;
818  ready_node.execTick = exec_tick;
819 
820  // Iterator to readyList
821  auto itr = readyList.begin();
822 
823  // If the readyList is empty, simply insert the new node at the beginning
824  // and return
825  if (itr == readyList.end()) {
826  readyList.insert(itr, ready_node);
827  maxReadyListSize = std::max<double>(readyList.size(),
829  return;
830  }
831 
832  // If the new node has its execution tick equal to the first node in the
833  // list then go to the next node. If the first node in the list failed
834  // to execute, its position as the first is thus maintained.
835  if (retryPkt)
836  if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
837  itr++;
838 
839  // Increment the iterator and compare the node pointed to by it to the new
840  // node till the position to insert the new node is found.
841  bool found = false;
842  while (!found && itr != readyList.end()) {
843  // If the execution tick of the new node is less than the node then
844  // this is the position to insert
845  if (exec_tick < itr->execTick)
846  found = true;
847  // If the execution tick of the new node is equal to the node then
848  // sort in ascending order of sequence numbers
849  else if (exec_tick == itr->execTick) {
850  // If the sequence number of the new node is less than the node
851  // then this is the position to insert
852  if (seq_num < itr->seqNum)
853  found = true;
854  // Else go to next node
855  else
856  itr++;
857  }
858  // If the execution tick of the new node is greater than the node then
859  // go to the next node
860  else
861  itr++;
862  }
863  readyList.insert(itr, ready_node);
864  // Update the stat for max size reached of the readyList
865  maxReadyListSize = std::max<double>(readyList.size(),
867 }
868 
869 void
871 
872  auto itr = readyList.begin();
873  if (itr == readyList.end()) {
874  DPRINTF(TraceCPUData, "readyList is empty.\n");
875  return;
876  }
877  DPRINTF(TraceCPUData, "Printing readyList:\n");
878  while (itr != readyList.end()) {
879  auto graph_itr = depGraph.find(itr->seqNum);
880  GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
881  DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
882  node_ptr->typeToStr(), itr->execTick);
883  itr++;
884  }
885 }
886 
888  uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
889  : sizeROB(max_rob),
890  sizeStoreBuffer(max_stores),
891  sizeLoadBuffer(max_loads),
892  oldestInFlightRobNum(UINT64_MAX),
893  numInFlightLoads(0),
894  numInFlightStores(0)
895 {}
896 
897 void
899 {
900  // Occupy ROB entry for the issued node
901  // Merely maintain the oldest node, i.e. numerically least robNum by saving
902  // it in the variable oldestInFLightRobNum.
903  inFlightNodes[new_node->seqNum] = new_node->robNum;
904  oldestInFlightRobNum = inFlightNodes.begin()->second;
905 
906  // Occupy Load/Store Buffer entry for the issued node if applicable
907  if (new_node->isLoad()) {
908  ++numInFlightLoads;
909  } else if (new_node->isStore()) {
910  ++numInFlightStores;
911  } // else if it is a non load/store node, no buffer entry is occupied
912 
913  printOccupancy();
914 }
915 
916 void
918 {
919  assert(!inFlightNodes.empty());
920  DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
921  done_node->seqNum);
922 
923  assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
924  inFlightNodes.erase(done_node->seqNum);
925 
926  if (inFlightNodes.empty()) {
927  // If we delete the only in-flight node and then the
928  // oldestInFlightRobNum is set to it's initialized (max) value.
929  oldestInFlightRobNum = UINT64_MAX;
930  } else {
931  // Set the oldest in-flight node rob number equal to the first node in
932  // the inFlightNodes since that will have the numerically least value.
933  oldestInFlightRobNum = inFlightNodes.begin()->second;
934  }
935 
936  DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
937  "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
938  oldestInFlightRobNum);
939 
940  // A store is considered complete when a request is sent, thus ROB entry is
941  // freed. But it occupies an entry in the Store Buffer until its response
942  // is received. A load is considered complete when a response is received,
943  // thus both ROB and Load Buffer entries can be released.
944  if (done_node->isLoad()) {
945  assert(numInFlightLoads != 0);
946  --numInFlightLoads;
947  }
948  // For normal writes, we send the requests out and clear a store buffer
949  // entry on response. For writes which are strictly ordered, for e.g.
950  // writes to device registers, we do that within release() which is called
951  // when node is executed and taken off from readyList.
952  if (done_node->isStore() && done_node->isStrictlyOrdered()) {
953  releaseStoreBuffer();
954  }
955 }
956 
957 void
959 {
960  assert(numInFlightStores != 0);
961  --numInFlightStores;
962 }
963 
964 bool
966  const GraphNode* new_node) const
967 {
968  uint16_t num_in_flight_nodes;
969  if (inFlightNodes.empty()) {
970  num_in_flight_nodes = 0;
971  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
972  " #in-flight nodes = 0", new_node->seqNum);
973  } else if (new_node->robNum > oldestInFlightRobNum) {
974  // This is the intuitive case where new dep-free node is younger
975  // instruction than the oldest instruction in-flight. Thus we make sure
976  // in_flight_nodes does not overflow.
977  num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
978  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
979  " #in-flight nodes = %d - %d = %d", new_node->seqNum,
980  new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
981  } else {
982  // This is the case where an instruction older than the oldest in-
983  // flight instruction becomes dep-free. Thus we must have already
984  // accounted for the entry in ROB for this new dep-free node.
985  // Immediately after this check returns true, oldestInFlightRobNum will
986  // be updated in occupy(). We simply let this node issue now.
987  num_in_flight_nodes = 0;
988  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
989  " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
990  new_node->seqNum, new_node->robNum);
991  }
992  DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
993  numInFlightLoads, sizeLoadBuffer,
994  numInFlightStores, sizeStoreBuffer);
995  // Check if resources are available to issue the specific node
996  if (num_in_flight_nodes >= sizeROB) {
997  return false;
998  }
999  if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
1000  return false;
1001  }
1002  if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
1003  return false;
1004  }
1005  return true;
1006 }
1007 
1008 bool
1010  // Return true if there is at least one read or write request in flight
1011  return (numInFlightStores != 0 || numInFlightLoads != 0);
1012 }
1013 
1014 void
1016  DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
1017  "LQ = %d/%d, SQ = %d/%d.\n",
1018  oldestInFlightRobNum,
1019  numInFlightLoads, sizeLoadBuffer,
1020  numInFlightStores, sizeStoreBuffer);
1021 }
1022 
1023 void
1025 {
1026  using namespace Stats;
1027 
1029  .name(name() + ".numSendAttempted")
1030  .desc("Number of first attempts to send a request")
1031  ;
1032 
1034  .name(name() + ".numSendSucceeded")
1035  .desc("Number of successful first attempts")
1036  ;
1037 
1039  .name(name() + ".numSendFailed")
1040  .desc("Number of failed first attempts")
1041  ;
1042 
1044  .name(name() + ".numRetrySucceeded")
1045  .desc("Number of successful retries")
1046  ;
1047 
1048  instLastTick
1049  .name(name() + ".instLastTick")
1050  .desc("Last tick simulated from the fixed inst trace")
1051  ;
1052 }
1053 
1054 Tick
1056 {
1057  DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1058  " IcacheGen: fixed issue with retry.\n");
1059 
1060  if (nextExecute()) {
1061  DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1062  return currElement.tick;
1063  } else {
1064  panic("Read of first message in the trace failed.\n");
1065  return MaxTick;
1066  }
1067 }
1068 
1069 bool
1071 {
1072  // If there is a retry packet, try to send it
1073  if (retryPkt) {
1074 
1075  DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1076 
1077  if (!port.sendTimingReq(retryPkt)) {
1078  // Still blocked! This should never occur.
1079  DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1080  return false;
1081  }
1083  } else {
1084 
1085  DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1086 
1087  // try sending current element
1088  assert(currElement.isValid());
1089 
1090  ++numSendAttempted;
1091 
1092  if (!send(currElement.addr, currElement.blocksize,
1093  currElement.cmd, currElement.flags, currElement.pc)) {
1094  DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1095  ++numSendFailed;
1096  // return false to indicate not to schedule next event
1097  return false;
1098  } else {
1099  ++numSendSucceeded;
1100  }
1101  }
1102  // If packet was sent successfully, either retryPkt or currElement, return
1103  // true to indicate to schedule event at current Tick plus delta. If packet
1104  // was sent successfully and there is no next packet to send, return false.
1105  DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1106  "element.\n");
1107  retryPkt = nullptr;
1108  // Read next element into currElement, currElement gets cleared so save the
1109  // tick to calculate delta
1110  Tick last_tick = currElement.tick;
1111  if (nextExecute()) {
1112  assert(currElement.tick >= last_tick);
1113  delta = currElement.tick - last_tick;
1114  }
1115  return !traceComplete;
1116 }
1117 
1118 void
1120 {
1121  trace.reset();
1122 }
1123 
1124 bool
1126 {
1127  if (traceComplete)
1128  // We are at the end of the file, thus we have no more messages.
1129  // Return false.
1130  return false;
1131 
1132 
1133  //Reset the currElement to the default values
1134  currElement.clear();
1135 
1136  // Read the next line to get the next message. If that fails then end of
1137  // trace has been reached and traceComplete needs to be set in addition
1138  // to returning false. If successful then next message is in currElement.
1139  if (!trace.read(&currElement)) {
1140  traceComplete = true;
1141  instLastTick = curTick();
1142  return false;
1143  }
1144 
1145  DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1146  currElement.cmd.isRead() ? 'r' : 'w',
1147  currElement.addr,
1148  currElement.pc,
1149  currElement.blocksize,
1150  currElement.tick);
1151 
1152  return true;
1153 }
1154 
1155 bool
1157  Request::FlagsType flags, Addr pc)
1158 {
1159 
1160  // Create new request
1161  Request* req = new Request(addr, size, flags, masterID);
1162  req->setPC(pc);
1163 
1164  // If this is not done it triggers assert in L1 cache for invalid contextId
1165  req->setContext(ContextID(0));
1166 
1167  // Embed it in a packet
1168  PacketPtr pkt = new Packet(req, cmd);
1169 
1170  uint8_t* pkt_data = new uint8_t[req->getSize()];
1171  pkt->dataDynamic(pkt_data);
1172 
1173  if (cmd.isWrite()) {
1174  memset(pkt_data, 0xA, req->getSize());
1175  }
1176 
1177  // Call MasterPort method to send a timing request for this packet
1178  bool success = port.sendTimingReq(pkt);
1179  if (!success) {
1180  // If it fails, save the packet to retry when a retry is signalled by
1181  // the cache
1182  retryPkt = pkt;
1183  }
1184  return success;
1185 }
1186 
1187 void
1189 {
1190  // Schedule an event to go through the control flow in the same tick as
1191  // retry is received
1192  DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1193  " event @%lli.\n", curTick());
1194  schedule(icacheNextEvent, curTick());
1195 }
1196 
1197 void
1199 {
1200  // Schedule an event to go through the execute flow in the same tick as
1201  // retry is received
1202  DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1203  " event @%lli.\n", curTick());
1204  schedule(dcacheNextEvent, curTick());
1205 }
1206 
1207 void
1209 {
1210  if (!dcacheNextEvent.scheduled()) {
1211  DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1212  when);
1213  schedule(dcacheNextEvent, when);
1215  } else if (when < dcacheNextEvent.when()) {
1216  DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1217  " to %lli.\n", dcacheNextEvent.when(), when);
1218  reschedule(dcacheNextEvent, when);
1219  }
1220 
1221 }
1222 
1223 bool
1225 {
1226  // All responses on the instruction fetch side are ignored. Simply delete
1227  // the request and packet to free allocated memory
1228  delete pkt->req;
1229  delete pkt;
1230 
1231  return true;
1232 }
1233 
1234 void
1236 {
1238 }
1239 
1240 void
1242 {
1243  DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1245 }
1246 
1247 bool
1249 {
1250  // Handle the responses for data memory requests which is done inside the
1251  // elastic data generator
1253  // After processing the response delete the request and packet to free
1254  // memory
1255  delete pkt->req;
1256  delete pkt;
1257 
1258  return true;
1259 }
1260 
1261 void
1263 {
1265 }
1266 
1268  const std::string& filename,
1269  const double time_multiplier)
1270  : trace(filename),
1271  timeMultiplier(time_multiplier),
1272  microOpCount(0)
1273 {
1274  // Create a protobuf message for the header and read it from the stream
1275  ProtoMessage::InstDepRecordHeader header_msg;
1276  if (!trace.read(header_msg)) {
1277  panic("Failed to read packet header from %s\n", filename);
1278 
1279  if (header_msg.tick_freq() != SimClock::Frequency) {
1280  panic("Trace %s was recorded with a different tick frequency %d\n",
1281  header_msg.tick_freq());
1282  }
1283  } else {
1284  // Assign window size equal to the field in the trace that was recorded
1285  // when the data dependency trace was captured in the o3cpu model
1286  windowSize = header_msg.window_size();
1287  }
1288 }
1289 
1290 void
1292 {
1293  trace.reset();
1294 }
1295 
1296 bool
1298 {
1299  ProtoMessage::InstDepRecord pkt_msg;
1300  if (trace.read(pkt_msg)) {
1301  // Required fields
1302  element->seqNum = pkt_msg.seq_num();
1303  element->type = pkt_msg.type();
1304  // Scale the compute delay to effectively scale the Trace CPU frequency
1305  element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1306 
1307  // Repeated field robDepList
1308  element->clearRobDep();
1309  assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1310  for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1311  element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1312  element->numRobDep += 1;
1313  }
1314 
1315  // Repeated field
1316  element->clearRegDep();
1317  assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1318  for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1319  // There is a possibility that an instruction has both, a register
1320  // and order dependency on an instruction. In such a case, the
1321  // register dependency is omitted
1322  bool duplicate = false;
1323  for (int j = 0; j < element->numRobDep; j++) {
1324  duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1325  }
1326  if (!duplicate) {
1327  element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1328  element->numRegDep += 1;
1329  }
1330  }
1331 
1332  // Optional fields
1333  if (pkt_msg.has_p_addr())
1334  element->physAddr = pkt_msg.p_addr();
1335  else
1336  element->physAddr = 0;
1337 
1338  if (pkt_msg.has_v_addr())
1339  element->virtAddr = pkt_msg.v_addr();
1340  else
1341  element->virtAddr = 0;
1342 
1343  if (pkt_msg.has_asid())
1344  element->asid = pkt_msg.asid();
1345  else
1346  element->asid = 0;
1347 
1348  if (pkt_msg.has_size())
1349  element->size = pkt_msg.size();
1350  else
1351  element->size = 0;
1352 
1353  if (pkt_msg.has_flags())
1354  element->flags = pkt_msg.flags();
1355  else
1356  element->flags = 0;
1357 
1358  if (pkt_msg.has_pc())
1359  element->pc = pkt_msg.pc();
1360  else
1361  element->pc = 0;
1362 
1363  // ROB occupancy number
1364  ++microOpCount;
1365  if (pkt_msg.has_weight()) {
1366  microOpCount += pkt_msg.weight();
1367  }
1368  element->robNum = microOpCount;
1369  return true;
1370  }
1371 
1372  // We have reached the end of the file
1373  return false;
1374 }
1375 
1376 bool
1378 {
1379  for (auto& own_reg_dep : regDep) {
1380  if (own_reg_dep == reg_dep) {
1381  // If register dependency is found, make it zero and return true
1382  own_reg_dep = 0;
1383  assert(numRegDep > 0);
1384  --numRegDep;
1385  DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1386  "done.\n", seqNum, reg_dep);
1387  return true;
1388  }
1389  }
1390 
1391  // Return false if the dependency is not found
1392  return false;
1393 }
1394 
1395 bool
1397 {
1398  for (auto& own_rob_dep : robDep) {
1399  if (own_rob_dep == rob_dep) {
1400  // If the rob dependency is found, make it zero and return true
1401  own_rob_dep = 0;
1402  assert(numRobDep > 0);
1403  --numRobDep;
1404  DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1405  "done.\n", seqNum, rob_dep);
1406  return true;
1407  }
1408  }
1409  return false;
1410 }
1411 
1412 void
1414  for (auto& own_reg_dep : regDep) {
1415  own_reg_dep = 0;
1416  }
1417  numRegDep = 0;
1418 }
1419 
1420 void
1422  for (auto& own_rob_dep : robDep) {
1423  own_rob_dep = 0;
1424  }
1425  numRobDep = 0;
1426 }
1427 
1428 bool
1430 {
1431  // If it is an rob dependency then remove it
1432  if (!removeRobDep(done_seq_num)) {
1433  // If it is not an rob dependency then it must be a register dependency
1434  // If the register dependency is not found, it violates an assumption
1435  // and must be caught by assert.
1436  bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1437  assert(regdep_found);
1438  }
1439  // Return true if the node is dependency free
1440  return (numRobDep == 0 && numRegDep == 0);
1441 }
1442 
1443 void
1445 {
1446  DPRINTFR(TraceCPUData, "%lli", seqNum);
1447  DPRINTFR(TraceCPUData, ",%s", typeToStr());
1448  if (isLoad() || isStore()) {
1449  DPRINTFR(TraceCPUData, ",%i", physAddr);
1450  DPRINTFR(TraceCPUData, ",%i", size);
1451  DPRINTFR(TraceCPUData, ",%i", flags);
1452  }
1453  DPRINTFR(TraceCPUData, ",%lli", compDelay);
1454  int i = 0;
1455  DPRINTFR(TraceCPUData, "robDep:");
1456  while (robDep[i] != 0) {
1457  DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1458  i++;
1459  }
1460  i = 0;
1461  DPRINTFR(TraceCPUData, "regDep:");
1462  while (regDep[i] != 0) {
1463  DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1464  i++;
1465  }
1466  auto child_itr = dependents.begin();
1467  DPRINTFR(TraceCPUData, "dependents:");
1468  while (child_itr != dependents.end()) {
1469  DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1470  child_itr++;
1471  }
1472 
1473  DPRINTFR(TraceCPUData, "\n");
1474 }
1475 
1476 std::string
1478 {
1479  return Record::RecordType_Name(type);
1480 }
1481 
1483  : trace(filename)
1484 {
1485  // Create a protobuf message for the header and read it from the stream
1486  ProtoMessage::PacketHeader header_msg;
1487  if (!trace.read(header_msg)) {
1488  panic("Failed to read packet header from %s\n", filename);
1489 
1490  if (header_msg.tick_freq() != SimClock::Frequency) {
1491  panic("Trace %s was recorded with a different tick frequency %d\n",
1492  header_msg.tick_freq());
1493  }
1494  }
1495 }
1496 
1497 void
1499 {
1500  trace.reset();
1501 }
1502 
1503 bool
1505 {
1506  ProtoMessage::Packet pkt_msg;
1507  if (trace.read(pkt_msg)) {
1508  element->cmd = pkt_msg.cmd();
1509  element->addr = pkt_msg.addr();
1510  element->blocksize = pkt_msg.size();
1511  element->tick = pkt_msg.tick();
1512  element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1513  element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1514  return true;
1515  }
1516 
1517  // We have reached the end of the file
1518  return false;
1519 }
InputStream trace
Input stream used for reading the input trace file.
Definition: trace_cpu.hh:1000
void execute()
This is the main execute function which consumes nodes from the sorted readyList. ...
Definition: trace_cpu.cc:440
Struct to store a ready-to-execute node and its execution tick.
Definition: trace_cpu.hh:685
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:677
void schedDcacheNext()
This is the control flow that uses the functionality of the dcacheGen to replay the trace...
Definition: trace_cpu.cc:191
#define DPRINTF(x,...)
Definition: trace.hh:212
MasterPort & getDataPort()
Used to get a reference to the dcache port.
Definition: trace_cpu.hh:1152
Stats::Scalar maxReadyListSize
Definition: trace_cpu.hh:1051
const uint64_t progressMsgInterval
Interval of committed instructions specified by the user at which a progress info message is printed...
Definition: trace_cpu.hh:1129
bool send(Addr addr, unsigned size, const MemCmd &cmd, Request::FlagsType flags, Addr pc)
Creates a new request assigning the request parameters passed by the arguments.
Definition: trace_cpu.cc:1156
Addr blocksize
The size of the access for the request.
Definition: trace_cpu.hh:360
void setPC(Addr pc)
Definition: request.hh:701
Stats::Scalar dataLastTick
Tick when ElasticDataGen completes execution.
Definition: trace_cpu.hh:1060
TraceCPU & owner
Reference of the TraceCPU.
Definition: trace_cpu.hh:991
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
NodeSeqNum seqNum
The sequence number of the ready node.
Definition: trace_cpu.hh:688
void exit()
Exit the FixedRetryGen.
Definition: trace_cpu.cc:1119
Definition: packet.hh:73
void recvReqRetry()
Handle a retry signalled by the cache if data access failed in the first attempt. ...
Definition: trace_cpu.cc:1262
uint32_t windowSize
The window size that is read from the header of the protobuf trace and used to process the dependency...
Definition: trace_cpu.hh:818
bool isTraceComplete()
Returns the traceComplete variable which is set when end of the input trace file is reached...
Definition: trace_cpu.hh:496
const std::string & name()
Definition: trace.cc:49
Bitfield< 7 > i
Definition: miscregs.hh:1378
bool removeRegDep(NodeSeqNum reg_dep)
Remove completed instruction from register dependency array.
Definition: trace_cpu.cc:1377
#define panic(...)
Definition: misc.hh:153
std::list< ReadyNode > readyList
List of nodes that are ready to execute.
Definition: trace_cpu.hh:1047
Request::Flags flags
Request flags if any.
Definition: trace_cpu.hh:616
RecordType type
Type of the node corresponding to the instruction modelled by it.
Definition: trace_cpu.hh:601
PacketPtr retryPkt
PacketPtr used to store the packet to retry.
Definition: trace_cpu.hh:1006
Stats::Scalar numSchedIcacheEvent
Definition: trace_cpu.hh:1139
uint32_t FlagsType
Definition: request.hh:90
bool nextExecute()
Reads a line of the trace file.
Definition: trace_cpu.cc:1125
void updateNumOps(uint64_t rob_num)
Definition: trace_cpu.cc:96
~TraceCPU()
Definition: trace_cpu.cc:84
HardwareResource(uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
Constructor that initializes the sizes of the structures.
Definition: trace_cpu.cc:887
PacketPtr executeMemReq(GraphNode *node_ptr)
Creates a new request for a load or store assigning the request parameters.
Definition: trace_cpu.cc:634
Request::FlagsType flags
Potential request flags to use.
Definition: trace_cpu.hh:366
Stats::Scalar numSOStores
Definition: trace_cpu.hh:1058
void completeMemAccess(PacketPtr pkt)
When a load writeback is received, that is when the load completes, release the dependents on it...
Definition: trace_cpu.cc:746
physAddr
Definition: misc.hh:832
ip6_addr_t addr
Definition: inet.hh:335
bool isWrite() const
Definition: packet.hh:503
void init()
Definition: trace_cpu.cc:123
Tick traceOffset
This stores the time offset in the trace, which is taken away from the ready times of requests...
Definition: trace_cpu.hh:1102
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:381
std::string instTraceFile
File names for input instruction and data traces.
Definition: trace_cpu.hh:335
void exit()
Exit the ElasticDataGen.
Definition: trace_cpu.cc:351
void regStats()
Definition: trace_cpu.cc:226
uint32_t asid
The address space id which is set if the virtual address is set.
Definition: trace_cpu.hh:610
static const uint8_t maxRobDep
The maximum no.
Definition: trace_cpu.hh:586
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and simply delete the packet since instruction fetch requests are issued a...
Definition: trace_cpu.cc:1224
void checkAndSchedExitEvent()
This is called when either generator finishes executing from the trace.
Definition: trace_cpu.cc:205
bool read(google::protobuf::Message &msg)
Read a message from the stream.
Definition: protoio.cc:177
void setContext(ContextID context_id)
Set up Context numbers.
Definition: request.hh:449
bool oneTraceComplete
Set to true when one of the generators finishes replaying its trace.
Definition: trace_cpu.hh:1094
void schedIcacheNext()
This is the control flow that uses the functionality of the icacheGen to replay the trace...
Definition: trace_cpu.cc:166
Stats::Scalar numSchedDcacheEvent
Definition: trace_cpu.hh:1138
MemCmd cmd
Specifies if the request is to be a read or a write.
Definition: trace_cpu.hh:354
Bitfield< 23, 0 > offset
Definition: types.hh:149
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the slave port by calling its corresponding receive function...
Definition: port.cc:180
void clearRobDep()
Initialize register dependency array to all zeroes.
Definition: trace_cpu.cc:1421
uint64_t NodeSeqNum
Node sequence number type.
Definition: trace_cpu.hh:564
bool readNextWindow()
Reads a line of the trace file.
Definition: trace_cpu.cc:357
void reset()
Reset the stream such that it can be played once again.
Definition: trace_cpu.cc:1291
Stats::Scalar maxDependents
Stats for data memory accesses replayed.
Definition: trace_cpu.hh:1050
const int MaxInstSrcRegs
Definition: registers.hh:56
uint8_t numRegDep
Number of register dependencies.
Definition: trace_cpu.hh:637
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition: core.cc:47
bool execComplete
Set true when execution of trace is complete.
Definition: trace_cpu.hh:1015
void addDepsOnParent(GraphNode *new_node, T &dep_array, uint8_t &num_dep)
Iterate over the dependencies of a new node and add the new node to the list of dependents of the par...
Definition: trace_cpu.cc:410
Stats::Scalar numSendSucceeded
Definition: trace_cpu.hh:1053
void reset()
Reset the stream such that it can be played once again.
Definition: trace_cpu.cc:1498
EventWrapper< TraceCPU,&TraceCPU::schedIcacheNext > icacheNextEvent
Event for the control flow method schedIcacheNext()
Definition: trace_cpu.hh:1085
A BaseSlavePort is a protocol-agnostic slave port, responsible only for the structural connection to ...
Definition: port.hh:139
bool traceComplete
Set to true when end of trace is reached.
Definition: trace_cpu.hh:1009
Tick init()
Called from TraceCPU init().
Definition: trace_cpu.cc:314
void clearRegDep()
Initialize register dependency array to all zeroes.
Definition: trace_cpu.cc:1413
const uint32_t windowSize
Window size within which to check for dependencies.
Definition: trace_cpu.hh:1026
std::queue< const GraphNode * > depFreeQueue
Queue of dependency-free nodes that are pending issue because resources are not available.
Definition: trace_cpu.hh:1044
bool isWrite() const
Definition: packet.hh:189
bool isAvailable(const GraphNode *new_node) const
Check if structures required to issue a node are free.
Definition: trace_cpu.cc:965
ProtoInputStream trace
Input file stream for the protobuf trace.
Definition: trace_cpu.hh:801
CountedExitEvent * execCompleteEvent
A CountedExitEvent which when serviced decrements the counter.
Definition: trace_cpu.hh:1117
system
Definition: isa.cc:226
const Tick MaxTick
Definition: types.hh:65
Tick curTick()
The current simulated tick.
Definition: core.hh:47
bool isExecComplete() const
Returns the execComplete variable which is set when the last node is executed.
Definition: trace_cpu.hh:969
#define DTRACE(x)
Definition: trace.hh:210
The trace cpu replays traces generated using the elastic trace probe attached to the O3 CPU model...
Definition: trace_cpu.hh:144
uint64_t progressMsgThreshold
Definition: trace_cpu.hh:1136
NodeRobNum robNum
ROB occupancy number.
Definition: trace_cpu.hh:598
bool isStrictlyOrdered() const
Return true if node has a request which is strictly ordered.
Definition: trace_cpu.hh:671
TraceCPU(TraceCPUParams *params)
Definition: trace_cpu.cc:49
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:397
uint64_t Tick
Tick count type.
Definition: types.hh:63
const bool enableEarlyExit
Exit when any one Trace CPU completes its execution.
Definition: trace_cpu.hh:1123
The struct GraphNode stores an instruction in the trace file.
Definition: trace_cpu.hh:578
bool nextRead
Set to true when the next window of instructions need to be read.
Definition: trace_cpu.hh:1012
This struct stores a line in the trace file.
Definition: trace_cpu.hh:351
const RequestPtr req
A pointer to the original request.
Definition: packet.hh:304
void dcacheRetryRecvd()
When data cache port receives a retry, schedule event dcacheNextEvent.
Definition: trace_cpu.cc:1198
FixedRetryGen icacheGen
Instance of FixedRetryGen to replay instruction read requests.
Definition: trace_cpu.hh:1064
void recvReqRetry()
Handle a retry signalled by the cache if instruction read failed in the first attempt.
Definition: trace_cpu.cc:1235
void writeElementAsTrace() const
Write out element in trace-compatible format using debug flag TraceCPUData.
Definition: trace_cpu.cc:1444
const std::string & name() const
Returns name of the ElasticDataGen instance.
Definition: trace_cpu.hh:893
void setReqInstSeqNum(const InstSeqNum seq_num)
Definition: request.hh:759
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
void release(const GraphNode *done_node)
Release appropriate structures for a completed node.
Definition: trace_cpu.cc:917
Derived & precision(int _precision)
Set the precision and marks this stat to print at the end of simulation.
Definition: statistics.hh:299
uint64_t compDelay
Computational delay.
Definition: trace_cpu.hh:628
MasterPort & port
Reference of the port to be used to issue memory requests.
Definition: trace_cpu.hh:994
static int numTraceCPUs
Number of Trace CPUs in the system used as a shared variable and passed to the CountedExitEvent event...
Definition: trace_cpu.hh:1110
Stats::Formula cpi
Stat for the CPI.
Definition: trace_cpu.hh:1144
void dcacheRecvTimingResp(PacketPtr pkt)
When data cache port receives a response, this calls the dcache generator method handle to complete t...
Definition: trace_cpu.cc:1241
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245
InstSeqNum getReqInstSeqNum() const
Definition: request.hh:752
Tick execTick
The tick at which the ready node must be executed.
Definition: trace_cpu.hh:691
Bitfield< 24 > j
Definition: miscregs.hh:1369
static const int NumArgumentRegs M5_VAR_USED
Definition: process.cc:83
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:254
Stats::Scalar numOps
Stat for number of simulated micro-ops.
Definition: trace_cpu.hh:1142
bool checkAndIssue(const GraphNode *node_ptr, bool first=true)
Attempts to issue a node once the node's source dependencies are complete.
Definition: trace_cpu.cc:706
const MasterID masterID
MasterID used for the requests being sent.
Definition: trace_cpu.hh:997
Tick tick
The time at which the request should be sent.
Definition: trace_cpu.hh:363
type
Definition: misc.hh:728
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition: sim_events.cc:83
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and call dcacheRecvTimingResp() method of the dcacheGen to handle completi...
Definition: trace_cpu.cc:1248
int size()
Definition: pagetable.hh:146
uint8_t numRobDep
Number of order dependencies.
Definition: trace_cpu.hh:625
Stats::Scalar numSendFailed
Definition: trace_cpu.hh:1054
void schedDcacheNextEvent(Tick when)
Schedule event dcacheNextEvent at the given tick.
Definition: trace_cpu.cc:1208
EventWrapper< TraceCPU,&TraceCPU::schedDcacheNext > dcacheNextEvent
Event for the control flow method schedDcacheNext()
Definition: trace_cpu.hh:1088
std::string dataTraceFile
Definition: trace_cpu.hh:335
Stats::Scalar numSendAttempted
Definition: trace_cpu.hh:1052
ElasticDataGen dcacheGen
Instance of ElasticDataGen to replay data read and write requests.
Definition: trace_cpu.hh:1067
bool isLoad() const
Is the node a load.
Definition: trace_cpu.hh:647
Addr virtAddr
The virtual address for the request if any.
Definition: trace_cpu.hh:607
uint32_t size
Size of request if any.
Definition: trace_cpu.hh:613
void printReadyList()
Print readyList for debugging using debug flag TraceCPUData.
Definition: trace_cpu.cc:870
void setVirt(int asid, Addr vaddr, unsigned size, Flags flags, MasterID mid, Addr pc)
Set up a virtual (e.g., CPU) request in a previously allocated Request object.
Definition: request.hh:460
InputStream(const std::string &filename, const double time_multiplier)
Create a trace input stream for a given file name.
Definition: trace_cpu.cc:1267
void releaseStoreBuffer()
Release store buffer entry for a completed store.
Definition: trace_cpu.cc:958
void adjustInitTraceOffset(Tick &offset)
Adjust traceOffset based on what TraceCPU init() determines on comparing the offsets in the fetch req...
Definition: trace_cpu.cc:344
void occupy(const GraphNode *new_node)
Occupy appropriate structures for an issued node.
Definition: trace_cpu.cc:898
RegDepArray regDep
Array of register dependencies (incoming) if any.
Definition: trace_cpu.hh:634
Stats::Scalar numRetrySucceeded
Definition: trace_cpu.hh:1055
void bind(BaseSlavePort &slave_port)
Bind this master port to a slave port.
Definition: port.cc:128
static PacketPtr createRead(const RequestPtr req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:809
std::string typeToStr() const
Return string specifying the type of the node.
Definition: trace_cpu.cc:1477
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:947
Addr addr
The address for the request.
Definition: trace_cpu.hh:357
Tick init()
Called from TraceCPU init().
Definition: trace_cpu.cc:1055
bool isStore() const
Is the node a store.
Definition: trace_cpu.hh:650
void icacheRetryRecvd()
When instruction cache port receives a retry, schedule event icacheNextEvent.
Definition: trace_cpu.cc:1188
std::unordered_map< NodeSeqNum, GraphNode * > depGraph
Store the depGraph of GraphNodes.
Definition: trace_cpu.hh:1035
IntReg pc
Definition: remote_gdb.hh:91
Addr physAddr
The address for the request if any.
Definition: trace_cpu.hh:604
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:287
bool awaitingResponse() const
Check if there are any outstanding requests, i.e.
Definition: trace_cpu.cc:1009
Stats::Scalar numSplitReqs
Definition: trace_cpu.hh:1056
void printOccupancy()
Print resource occupancy for debugging.
Definition: trace_cpu.cc:1015
void addToSortedReadyList(NodeSeqNum seq_num, Tick exec_tick)
Add a ready node to the readyList.
Definition: trace_cpu.cc:813
fatal_if(p->js_features.size() > 16,"Too many job slot feature registers specified (%i)\n", p->js_features.size())
static PacketPtr createWrite(const RequestPtr req)
Definition: packet.hh:815
bool read(TraceElement *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
Definition: trace_cpu.cc:1504
unsigned getSize() const
Definition: request.hh:552
#define inform(...)
Definition: misc.hh:221
void setPaddr(Addr paddr)
Set just the physical address.
Definition: request.hh:487
RobDepArray robDep
Array of order dependencies.
Definition: trace_cpu.hh:622
void takeOverFrom(BaseCPU *oldCPU)
Definition: trace_cpu.cc:106
int ContextID
Globally unique thread context ID.
Definition: types.hh:175
Command cmd
Definition: packet.hh:178
bool read(GraphNode *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
Definition: trace_cpu.cc:1297
const FlagsType init
This Stat is Initialized.
Definition: info.hh:45
NodeSeqNum seqNum
Instruction sequence number.
Definition: trace_cpu.hh:595
Stats::Scalar numSOLoads
Definition: trace_cpu.hh:1057
InputStream trace
Input stream used for reading the input trace file.
Definition: trace_cpu.hh:514
bool removeRobDep(NodeSeqNum rob_dep)
Remove completed instruction from order dependency array.
Definition: trace_cpu.cc:1396
HardwareResource hwResource
Hardware resources required to contain in-flight nodes and to throttle issuing of new nodes when reso...
Definition: trace_cpu.hh:1032
bool removeDepOnInst(NodeSeqNum done_seq_num)
Check for all dependencies on completed inst.
Definition: trace_cpu.cc:1429
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:102
MasterPort & getInstPort()
Used to get a reference to the icache port.
Definition: trace_cpu.hh:1149
InputStream(const std::string &filename)
Create a trace input stream for a given file name.
Definition: trace_cpu.cc:1482
bool tryNext()
This tries to send current or retry packet and returns true if successfull.
Definition: trace_cpu.cc:1070
#define DPRINTFR(...)
Definition: trace.hh:214

Generated on Fri Jun 9 2017 13:03:45 for gem5 by doxygen 1.8.6