gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
elastic_trace.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 - 2015 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  *
37  * Authors: Radhika Jagtap
38  * Andreas Hansson
39  * Thomas Grass
40  */
41 
43 
44 #include "base/callback.hh"
45 #include "base/output.hh"
46 #include "base/trace.hh"
47 #include "cpu/reg_class.hh"
48 #include "debug/ElasticTrace.hh"
49 #include "mem/packet.hh"
50 
51 ElasticTrace::ElasticTrace(const ElasticTraceParams* params)
52  : ProbeListenerObject(params),
53  regEtraceListenersEvent(this),
54  firstWin(true),
55  lastClearedSeqNum(0),
56  depWindowSize(params->depWindowSize),
57  dataTraceStream(nullptr),
58  instTraceStream(nullptr),
59  startTraceInst(params->startTraceInst),
60  allProbesReg(false),
61  traceVirtAddr(params->traceVirtAddr)
62 {
63  cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager);
64  fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
65  "support dependency tracing.\n", name());
66 
67  fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
68  "Recommended size is 3x ROB size in the O3CPU.\n");
69 
70  fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
71  "single-threaded workload only", cpu->numThreads, name());
72  // Initialize the protobuf output stream
73  fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\
74  "trace file path to instFetchTraceFile");
75  fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\
76  "trace file path to dataDepTraceFile");
77  std::string filename = simout.resolve(name() + "." +
78  params->instFetchTraceFile);
79  instTraceStream = new ProtoOutputStream(filename);
80  filename = simout.resolve(name() + "." + params->dataDepTraceFile);
81  dataTraceStream = new ProtoOutputStream(filename);
82  // Create a protobuf message for the header and write it to the stream
83  ProtoMessage::PacketHeader inst_pkt_header;
84  inst_pkt_header.set_obj_id(name());
85  inst_pkt_header.set_tick_freq(SimClock::Frequency);
86  instTraceStream->write(inst_pkt_header);
87  // Create a protobuf message for the header and write it to
88  // the stream
89  ProtoMessage::InstDepRecordHeader data_rec_header;
90  data_rec_header.set_obj_id(name());
91  data_rec_header.set_tick_freq(SimClock::Frequency);
92  data_rec_header.set_window_size(depWindowSize);
93  dataTraceStream->write(data_rec_header);
94  // Register a callback to flush trace records and close the output streams.
98 }
99 
100 void
102 {
103  inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
105  if (startTraceInst == 0) {
106  // If we want to start tracing from the start of the simulation,
107  // register all elastic trace probes now.
109  } else {
110  // Schedule an event to register all elastic trace probes when
111  // specified no. of instructions are committed.
112  cpu->comInstEventQueue[(ThreadID)0]->schedule(&regEtraceListenersEvent,
114  }
115 }
116 
117 void
119 {
120  assert(!allProbesReg);
121  inform("@%llu: No. of instructions committed = %llu, registering elastic"
122  " probe listeners", curTick(), cpu->numSimulatedInsts());
123  // Create new listeners: provide method to be called upon a notify() for
124  // each probe point.
126  "FetchRequest", &ElasticTrace::fetchReqTrace));
128  "Execute", &ElasticTrace::recordExecTick));
130  "ToCommit", &ElasticTrace::recordToCommTick));
132  "Rename", &ElasticTrace::updateRegDep));
134  "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
136  "Squash", &ElasticTrace::addSquashedInst));
138  "Commit", &ElasticTrace::addCommittedInst));
139  allProbesReg = true;
140 }
141 
142 void
144 {
145 
146  DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
147  (MemCmd::ReadReq),
148  req->getPC(), req->getVaddr(), req->getPaddr(),
149  req->getFlags(), req->getSize(), curTick());
150 
151  // Create a protobuf message including the request fields necessary to
152  // recreate the request in the TraceCPU.
153  ProtoMessage::Packet inst_fetch_pkt;
154  inst_fetch_pkt.set_tick(curTick());
155  inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
156  inst_fetch_pkt.set_pc(req->getPC());
157  inst_fetch_pkt.set_flags(req->getFlags());
158  inst_fetch_pkt.set_addr(req->getPaddr());
159  inst_fetch_pkt.set_size(req->getSize());
160  // Write the message to the stream.
161  instTraceStream->write(inst_fetch_pkt);
162 }
163 
164 void
166 {
167 
168  // In a corner case, a retired instruction is propagated backward to the
169  // IEW instruction queue to handle some side-channel information. But we
170  // must not process an instruction again. So we test the sequence number
171  // against the lastClearedSeqNum and skip adding the instruction for such
172  // corner cases.
173  if (dyn_inst->seqNum <= lastClearedSeqNum) {
174  DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
175  has already retired (mostly squashed)", dyn_inst->seqNum);
176  // Do nothing as program has proceeded and this inst has been
177  // propagated backwards to handle something.
178  return;
179  }
180 
181  DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
182  curTick());
183  // Either the execution info object will already exist if this
184  // instruction had a register dependency recorded in the rename probe
185  // listener before entering execute stage or it will not exist and will
186  // need to be created here.
187  InstExecInfo* exec_info_ptr;
188  auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
189  if (itr_exec_info != tempStore.end()) {
190  exec_info_ptr = itr_exec_info->second;
191  } else {
192  exec_info_ptr = new InstExecInfo;
193  tempStore[dyn_inst->seqNum] = exec_info_ptr;
194  }
195 
196  exec_info_ptr->executeTick = curTick();
197  maxTempStoreSize = std::max(tempStore.size(),
198  (std::size_t)maxTempStoreSize.value());
199 }
200 
201 void
203 {
204  // If tracing has just been enabled then the instruction at this stage of
205  // execution is far enough that we cannot gather info about its past like
206  // the tick it started execution. Simply return until we see an instruction
207  // that is found in the tempStore.
208  auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
209  if (itr_exec_info == tempStore.end()) {
210  DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
211  " skipping.\n", dyn_inst->seqNum);
212  return;
213  }
214 
215  DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
216  curTick());
217  InstExecInfo* exec_info_ptr = itr_exec_info->second;
218  exec_info_ptr->toCommitTick = curTick();
219 
220 }
221 
222 void
224 {
225  // Get the sequence number of the instruction
226  InstSeqNum seq_num = dyn_inst->seqNum;
227 
228  assert(dyn_inst->seqNum > lastClearedSeqNum);
229 
230  // Since this is the first probe activated in the pipeline, create
231  // a new execution info object to track this instruction as it
232  // progresses through the pipeline.
233  InstExecInfo* exec_info_ptr = new InstExecInfo;
234  tempStore[seq_num] = exec_info_ptr;
235 
236  // Loop through the source registers and look up the dependency map. If
237  // the source register entry is found in the dependency map, add a
238  // dependency on the last writer.
239  int8_t max_regs = dyn_inst->numSrcRegs();
240  for (int src_idx = 0; src_idx < max_regs; src_idx++) {
241  // Get the physical register index of the i'th source register.
242  PhysRegIndex src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
243  DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg %i\n", seq_num,
244  src_reg);
245  auto itr_last_writer = physRegDepMap.find(src_reg);
246  if (itr_last_writer != physRegDepMap.end()) {
247  InstSeqNum last_writer = itr_last_writer->second;
248  // Additionally the dependency distance is kept less than the window
249  // size parameter to limit the memory allocation to nodes in the
250  // graph. If the window were tending to infinite we would have to
251  // load a large number of node objects during replay.
252  if (seq_num - last_writer < depWindowSize) {
253  // Record a physical register dependency.
254  exec_info_ptr->physRegDepSet.insert(last_writer);
255  }
256  }
257  }
258 
259  // Loop through the destination registers of this instruction and update
260  // the physical register dependency map for last writers to registers.
261  max_regs = dyn_inst->numDestRegs();
262  for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
263  // For data dependency tracking the register must be an int, float or
264  // CC register and not a Misc register.
265  TheISA::RegIndex dest_reg = dyn_inst->destRegIdx(dest_idx);
266  if (regIdxToClass(dest_reg) != MiscRegClass) {
267  // Get the physical register index of the i'th destination register.
268  dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
269  if (dest_reg != TheISA::ZeroReg) {
270  DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg %i\n",
271  seq_num, dest_reg);
272  physRegDepMap[dest_reg] = seq_num;
273  }
274  }
275  }
276  maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
277  (std::size_t)maxPhysRegDepMapSize.value());
278 }
279 
280 void
282 {
283  DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
284  inst_reg_pair.second);
285  auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
286  if (itr_regdep_map != physRegDepMap.end())
287  physRegDepMap.erase(itr_regdep_map);
288 }
289 
290 void
292 {
293  // If the squashed instruction was squashed before being processed by
294  // execute stage then it will not be in the temporary store. In this case
295  // do nothing and return.
296  auto itr_exec_info = tempStore.find(head_inst->seqNum);
297  if (itr_exec_info == tempStore.end())
298  return;
299 
300  // If there is a squashed load for which a read request was
301  // sent before it got squashed then add it to the trace.
302  DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
303  head_inst->seqNum);
304  // Get pointer to the execution info object corresponding to the inst.
305  InstExecInfo* exec_info_ptr = itr_exec_info->second;
306  if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
307  exec_info_ptr->toCommitTick != MaxTick &&
308  head_inst->hasRequest() &&
309  head_inst->getFault() == NoFault) {
310  // Add record to depTrace with commit parameter as false.
311  addDepTraceRecord(head_inst, exec_info_ptr, false);
312  }
313  // As the information contained is no longer needed, remove the execution
314  // info object from the temporary store.
315  clearTempStoreUntil(head_inst);
316 }
317 
318 void
320 {
321  DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
322  head_inst->seqNum);
323 
324  // Add the instruction to the depTrace.
325  if (!head_inst->isNop()) {
326 
327  // If tracing has just been enabled then the instruction at this stage
328  // of execution is far enough that we cannot gather info about its past
329  // like the tick it started execution. Simply return until we see an
330  // instruction that is found in the tempStore.
331  auto itr_temp_store = tempStore.find(head_inst->seqNum);
332  if (itr_temp_store == tempStore.end()) {
333  DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
334  "store, skipping.\n", head_inst->seqNum);
335  return;
336  }
337 
338  // Get pointer to the execution info object corresponding to the inst.
339  InstExecInfo* exec_info_ptr = itr_temp_store->second;
340  assert(exec_info_ptr->executeTick != MaxTick);
341  assert(exec_info_ptr->toCommitTick != MaxTick);
342 
343  // Check if the instruction had a fault, if it predicated false and
344  // thus previous register values were restored or if it was a
345  // load/store that did not have a request (e.g. when the size of the
346  // request is zero). In all these cases the instruction is set as
347  // executed and is picked up by the commit probe listener. But a
348  // request is not issued and registers are not written. So practically,
349  // skipping these should not hurt as execution would not stall on them.
350  // Alternatively, these could be included merely as a compute node in
351  // the graph. Removing these for now. If correlation accuracy needs to
352  // be improved in future these can be turned into comp nodes at the
353  // cost of bigger traces.
354  if (head_inst->getFault() != NoFault) {
355  DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
356  "skip adding it to the trace\n",
357  (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
358  head_inst->seqNum);
359  } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
360  DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so "
361  "skip adding it to the trace\n", head_inst->seqNum);
362  } else if (!head_inst->readPredicate()) {
363  DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
364  "skip adding it to the trace\n",
365  (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
366  head_inst->seqNum);
367  } else {
368  // Add record to depTrace with commit parameter as true.
369  addDepTraceRecord(head_inst, exec_info_ptr, true);
370  }
371  }
372  // As the information contained is no longer needed, remove the execution
373  // info object from the temporary store.
374  clearTempStoreUntil(head_inst);
375 }
376 
377 void
379  InstExecInfo* exec_info_ptr, bool commit)
380 {
381  // Create a record to assign dynamic intruction related fields.
382  TraceInfo* new_record = new TraceInfo;
383  // Add to map for sequence number look up to retrieve the TraceInfo pointer
384  traceInfoMap[head_inst->seqNum] = new_record;
385 
386  // Assign fields from the instruction
387  new_record->instNum = head_inst->seqNum;
388  new_record->commit = commit;
389  new_record->type = head_inst->isLoad() ? Record::LOAD :
390  (head_inst->isStore() ? Record::STORE :
391  Record::COMP);
392 
393  // Assign fields for creating a request in case of a load/store
394  new_record->reqFlags = head_inst->memReqFlags;
395  new_record->virtAddr = head_inst->effAddr;
396  new_record->asid = head_inst->asid;
397  new_record->physAddr = head_inst->physEffAddrLow;
398  // Currently the tracing does not support split requests.
399  new_record->size = head_inst->effSize;
400  new_record->pc = head_inst->instAddr();
401 
402  // Assign the timing information stored in the execution info object
403  new_record->executeTick = exec_info_ptr->executeTick;
404  new_record->toCommitTick = exec_info_ptr->toCommitTick;
405  new_record->commitTick = curTick();
406 
407  // Assign initial values for number of dependents and computational delay
408  new_record->numDepts = 0;
409  new_record->compDelay = -1;
410 
411  // The physical register dependency set of the first instruction is
412  // empty. Since there are no records in the depTrace at this point, the
413  // case of adding an ROB dependency by using a reverse iterator is not
414  // applicable. Thus, populate the fields of the record corresponding to the
415  // first instruction and return.
416  if (depTrace.empty()) {
417  // Store the record in depTrace.
418  depTrace.push_back(new_record);
419  DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
420  new_record->instNum);
421  return;
422  }
423 
424  // Clear register dependencies for squashed loads as they may be dependent
425  // on squashed instructions and we do not add those to the trace.
426  if (head_inst->isLoad() && !commit) {
427  (exec_info_ptr->physRegDepSet).clear();
428  }
429 
430  // Assign the register dependencies stored in the execution info object
431  std::set<InstSeqNum>::const_iterator dep_set_it;
432  for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
433  dep_set_it != (exec_info_ptr->physRegDepSet).end();
434  ++dep_set_it) {
435  auto trace_info_itr = traceInfoMap.find(*dep_set_it);
436  if (trace_info_itr != traceInfoMap.end()) {
437  // The register dependency is valid. Assign it and calculate
438  // computational delay
439  new_record->physRegDepList.push_back(*dep_set_it);
440  DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
441  "%lli\n", new_record->instNum, *dep_set_it);
442  TraceInfo* reg_dep = trace_info_itr->second;
443  reg_dep->numDepts++;
444  compDelayPhysRegDep(reg_dep, new_record);
445  ++numRegDep;
446  } else {
447  // The instruction that this has a register dependency on was
448  // not added to the trace because of one of the following
449  // 1. it was an instruction that had a fault
450  // 2. it was an instruction that was predicated false and
451  // previous register values were restored
452  // 3. it was load/store that did not have a request (e.g. when
453  // the size of the request is zero but this may not be a fault)
454  // In all these cases the instruction is set as executed and is
455  // picked up by the commit probe listener. But a request is not
456  // issued and registers are not written to in these cases.
457  DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
458  "%lli is skipped\n",new_record->instNum, *dep_set_it);
459  }
460  }
461 
462  // Check for and assign an ROB dependency in addition to register
463  // dependency before adding the record to the trace.
464  // As stores have to commit in order a store is dependent on the last
465  // committed load/store. This is recorded in the ROB dependency.
466  if (head_inst->isStore()) {
467  // Look up store-after-store order dependency
468  updateCommitOrderDep(new_record, false);
469  // Look up store-after-load order dependency
470  updateCommitOrderDep(new_record, true);
471  }
472 
473  // In case a node is dependency-free or its dependency got discarded
474  // because it was outside the window, it is marked ready in the ROB at the
475  // time of issue. A request is sent as soon as possible. To model this, a
476  // node is assigned an issue order dependency on a committed instruction
477  // that completed earlier than it. This is done to avoid the problem of
478  // determining the issue times of such dependency-free nodes during replay
479  // which could lead to too much parallelism, thinking conservatively.
480  if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
481  updateIssueOrderDep(new_record);
482  }
483 
484  // Store the record in depTrace.
485  depTrace.push_back(new_record);
486  DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
487  (commit ? "committed" : "squashed"), new_record->instNum);
488 
489  // To process the number of records specified by depWindowSize in the
490  // forward direction, the depTrace must have twice as many records
491  // to check for dependencies.
492  if (depTrace.size() == 2 * depWindowSize) {
493 
494  DPRINTF(ElasticTrace, "Writing out trace...\n");
495 
496  // Write out the records which have been processed to the trace
497  // and remove them from the depTrace.
499 
500  // After the first window, writeDepTrace() must check for valid
501  // compDelay.
502  firstWin = false;
503  }
504 }
505 
506 void
508  bool find_load_not_store)
509 {
510  assert(new_record->isStore());
511  // Iterate in reverse direction to search for the last committed
512  // load/store that completed earlier than the new record
513  depTraceRevItr from_itr(depTrace.end());
514  depTraceRevItr until_itr(depTrace.begin());
515  TraceInfo* past_record = *from_itr;
516  uint32_t num_go_back = 0;
517 
518  // The execution time of this store is when it is sent, that is committed
519  Tick execute_tick = curTick();
520  // Search for store-after-load or store-after-store order dependency
521  while (num_go_back < depWindowSize && from_itr != until_itr) {
522  if (find_load_not_store) {
523  // Check if previous inst is a load completed earlier by comparing
524  // with execute tick
525  if (hasLoadCompleted(past_record, execute_tick)) {
526  // Assign rob dependency and calculate the computational delay
527  assignRobDep(past_record, new_record);
529  return;
530  }
531  } else {
532  // Check if previous inst is a store sent earlier by comparing with
533  // execute tick
534  if (hasStoreCommitted(past_record, execute_tick)) {
535  // Assign rob dependency and calculate the computational delay
536  assignRobDep(past_record, new_record);
538  return;
539  }
540  }
541  ++from_itr;
542  past_record = *from_itr;
543  ++num_go_back;
544  }
545 }
546 
547 void
549 {
550  // Interate in reverse direction to search for the last committed
551  // record that completed earlier than the new record
552  depTraceRevItr from_itr(depTrace.end());
553  depTraceRevItr until_itr(depTrace.begin());
554  TraceInfo* past_record = *from_itr;
555 
556  uint32_t num_go_back = 0;
557  Tick execute_tick = 0;
558 
559  if (new_record->isLoad()) {
560  // The execution time of a load is when a request is sent
561  execute_tick = new_record->executeTick;
563  } else if (new_record->isStore()) {
564  // The execution time of a store is when it is sent, i.e. committed
565  execute_tick = curTick();
567  } else {
568  // The execution time of a non load/store is when it completes
569  execute_tick = new_record->toCommitTick;
571  }
572 
573  // We search if this record has an issue order dependency on a past record.
574  // Once we find it, we update both the new record and the record it depends
575  // on and return.
576  while (num_go_back < depWindowSize && from_itr != until_itr) {
577  // Check if a previous inst is a load sent earlier, or a store sent
578  // earlier, or a comp inst completed earlier by comparing with execute
579  // tick
580  if (hasLoadBeenSent(past_record, execute_tick) ||
581  hasStoreCommitted(past_record, execute_tick) ||
582  hasCompCompleted(past_record, execute_tick)) {
583  // Assign rob dependency and calculate the computational delay
584  assignRobDep(past_record, new_record);
585  return;
586  }
587  ++from_itr;
588  past_record = *from_itr;
589  ++num_go_back;
590  }
591 }
592 
593 void
594 ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
595  DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
596  new_record->typeToStr(), new_record->instNum,
597  past_record->instNum);
598  // Add dependency on past record
599  new_record->robDepList.push_back(past_record->instNum);
600  // Update new_record's compute delay with respect to the past record
601  compDelayRob(past_record, new_record);
602  // Increment number of dependents of the past record
603  ++(past_record->numDepts);
604  // Update stat to log max number of dependents
605  maxNumDependents = std::max(past_record->numDepts,
606  (uint32_t)maxNumDependents.value());
607 }
608 
609 bool
611  Tick execute_tick) const
612 {
613  return (past_record->isStore() && past_record->commitTick <= execute_tick);
614 }
615 
616 bool
618  Tick execute_tick) const
619 {
620  return(past_record->isLoad() && past_record->commit &&
621  past_record->toCommitTick <= execute_tick);
622 }
623 
624 bool
626  Tick execute_tick) const
627 {
628  // Check if previous inst is a load sent earlier than this
629  return (past_record->isLoad() && past_record->commit &&
630  past_record->executeTick <= execute_tick);
631 }
632 
633 bool
635  Tick execute_tick) const
636 {
637  return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
638 }
639 
640 void
642 {
643  // Clear from temp store starting with the execution info object
644  // corresponding the head_inst and continue clearing by decrementing the
645  // sequence number until the last cleared sequence number.
646  InstSeqNum temp_sn = (head_inst->seqNum);
647  while (temp_sn > lastClearedSeqNum) {
648  auto itr_exec_info = tempStore.find(temp_sn);
649  if (itr_exec_info != tempStore.end()) {
650  InstExecInfo* exec_info_ptr = itr_exec_info->second;
651  // Free allocated memory for the info object
652  delete exec_info_ptr;
653  // Remove entry from temporary store
654  tempStore.erase(itr_exec_info);
655  }
656  temp_sn--;
657  }
658  // Update the last cleared sequence number to that of the head_inst
659  lastClearedSeqNum = head_inst->seqNum;
660 }
661 
662 void
664 {
665  // The computation delay is the delay between the completion tick of the
666  // inst. pointed to by past_record and the execution tick of its dependent
667  // inst. pointed to by new_record.
668  int64_t comp_delay = -1;
669  Tick execution_tick = 0, completion_tick = 0;
670 
671  DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
672  new_record->instNum, past_record->instNum);
673 
674  // Get the tick when the node is executed as per the modelling of
675  // computation delay
676  execution_tick = new_record->getExecuteTick();
677 
678  if (past_record->isLoad()) {
679  if (new_record->isStore()) {
680  completion_tick = past_record->toCommitTick;
681  } else {
682  completion_tick = past_record->executeTick;
683  }
684  } else if (past_record->isStore()) {
685  completion_tick = past_record->commitTick;
686  } else if (past_record->isComp()){
687  completion_tick = past_record->toCommitTick;
688  }
689  assert(execution_tick >= completion_tick);
690  comp_delay = execution_tick - completion_tick;
691 
692  DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
693  execution_tick, completion_tick, comp_delay);
694 
695  // Assign the computational delay with respect to the dependency which
696  // completes the latest.
697  if (new_record->compDelay == -1)
698  new_record->compDelay = comp_delay;
699  else
700  new_record->compDelay = std::min(comp_delay, new_record->compDelay);
701  DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
702  new_record->compDelay);
703 }
704 
705 void
707  TraceInfo* new_record)
708 {
709  // The computation delay is the delay between the completion tick of the
710  // inst. pointed to by past_record and the execution tick of its dependent
711  // inst. pointed to by new_record.
712  int64_t comp_delay = -1;
713  Tick execution_tick = 0, completion_tick = 0;
714 
715  DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
716  " %lli.\n", new_record->instNum, past_record->instNum);
717 
718  // Get the tick when the node is executed as per the modelling of
719  // computation delay
720  execution_tick = new_record->getExecuteTick();
721 
722  // When there is a physical register dependency on an instruction, the
723  // completion tick of that instruction is when it wrote to the register,
724  // that is toCommitTick. In case, of a store updating a destination
725  // register, this is approximated to commitTick instead
726  if (past_record->isStore()) {
727  completion_tick = past_record->commitTick;
728  } else {
729  completion_tick = past_record->toCommitTick;
730  }
731  assert(execution_tick >= completion_tick);
732  comp_delay = execution_tick - completion_tick;
733  DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
734  execution_tick, completion_tick, comp_delay);
735 
736  // Assign the computational delay with respect to the dependency which
737  // completes the latest.
738  if (new_record->compDelay == -1)
739  new_record->compDelay = comp_delay;
740  else
741  new_record->compDelay = std::min(comp_delay, new_record->compDelay);
742  DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
743  new_record->compDelay);
744 }
745 
746 Tick
748 {
749  if (isLoad()) {
750  // Execution tick for a load instruction is when the request was sent,
751  // that is executeTick.
752  return executeTick;
753  } else if (isStore()) {
754  // Execution tick for a store instruction is when the request was sent,
755  // that is commitTick.
756  return commitTick;
757  } else {
758  // Execution tick for a non load/store instruction is when the register
759  // value was written to, that is commitTick.
760  return toCommitTick;
761  }
762 }
763 
764 void
765 ElasticTrace::writeDepTrace(uint32_t num_to_write)
766 {
767  // Write the trace with fields as follows:
768  // Instruction sequence number
769  // If instruction was a load
770  // If instruction was a store
771  // If instruction has addr
772  // If instruction has size
773  // If instruction has flags
774  // List of order dependencies - optional, repeated
775  // Computational delay with respect to last completed dependency
776  // List of physical register RAW dependencies - optional, repeated
777  // Weight of a node equal to no. of filtered nodes before it - optional
778  uint16_t num_filtered_nodes = 0;
779  depTraceItr dep_trace_itr(depTrace.begin());
780  depTraceItr dep_trace_itr_start = dep_trace_itr;
781  while (num_to_write > 0) {
782  TraceInfo* temp_ptr = *dep_trace_itr;
783  assert(temp_ptr->type != Record::INVALID);
784  // If no node dependends on a comp node then there is no reason to
785  // track the comp node in the dependency graph. We filter out such
786  // nodes but count them and add a weight field to the subsequent node
787  // that we do include in the trace.
788  if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
789  DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
790  "is as follows:\n", temp_ptr->instNum);
791  if (temp_ptr->isLoad() || temp_ptr->isStore()) {
792  DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
793  DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
794  "size %i, flags %i\n", temp_ptr->physAddr,
795  temp_ptr->size, temp_ptr->reqFlags);
796  } else {
797  DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
798  }
799  if (firstWin && temp_ptr->compDelay == -1) {
800  if (temp_ptr->isLoad()) {
801  temp_ptr->compDelay = temp_ptr->executeTick;
802  } else if (temp_ptr->isStore()) {
803  temp_ptr->compDelay = temp_ptr->commitTick;
804  } else {
805  temp_ptr->compDelay = temp_ptr->toCommitTick;
806  }
807  }
808  assert(temp_ptr->compDelay != -1);
809  DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
810  temp_ptr->compDelay);
811 
812  // Create a protobuf message for the dependency record
813  ProtoMessage::InstDepRecord dep_pkt;
814  dep_pkt.set_seq_num(temp_ptr->instNum);
815  dep_pkt.set_type(temp_ptr->type);
816  dep_pkt.set_pc(temp_ptr->pc);
817  if (temp_ptr->isLoad() || temp_ptr->isStore()) {
818  dep_pkt.set_flags(temp_ptr->reqFlags);
819  dep_pkt.set_p_addr(temp_ptr->physAddr);
820  // If tracing of virtual addresses is enabled, set the optional
821  // field for it
822  if (traceVirtAddr) {
823  dep_pkt.set_v_addr(temp_ptr->virtAddr);
824  dep_pkt.set_asid(temp_ptr->asid);
825  }
826  dep_pkt.set_size(temp_ptr->size);
827  }
828  dep_pkt.set_comp_delay(temp_ptr->compDelay);
829  if (temp_ptr->robDepList.empty()) {
830  DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
831  }
832  while (!temp_ptr->robDepList.empty()) {
833  DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
834  temp_ptr->robDepList.front());
835  dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
836  temp_ptr->robDepList.pop_front();
837  }
838  if (temp_ptr->physRegDepList.empty()) {
839  DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
840  }
841  while (!temp_ptr->physRegDepList.empty()) {
842  DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
843  temp_ptr->physRegDepList.front());
844  dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
845  temp_ptr->physRegDepList.pop_front();
846  }
847  if (num_filtered_nodes != 0) {
848  // Set the weight of this node as the no. of filtered nodes
849  // between this node and the last node that we wrote to output
850  // stream. The weight will be used during replay to model ROB
851  // occupancy of filtered nodes.
852  dep_pkt.set_weight(num_filtered_nodes);
853  num_filtered_nodes = 0;
854  }
855  // Write the message to the protobuf output stream
856  dataTraceStream->write(dep_pkt);
857  } else {
858  // Don't write the node to the trace but note that we have filtered
859  // out a node.
861  ++num_filtered_nodes;
862  }
863  dep_trace_itr++;
864  traceInfoMap.erase(temp_ptr->instNum);
865  delete temp_ptr;
866  num_to_write--;
867  }
868  depTrace.erase(dep_trace_itr_start, dep_trace_itr);
869 }
870 
871 void
874 
875  using namespace Stats;
876  numRegDep
877  .name(name() + ".numRegDep")
878  .desc("Number of register dependencies recorded during tracing")
879  ;
880 
882  .name(name() + ".numOrderDepStores")
883  .desc("Number of commit order (rob) dependencies for a store recorded"
884  " on a past load/store during tracing")
885  ;
886 
888  .name(name() + ".numIssueOrderDepLoads")
889  .desc("Number of loads that got assigned issue order dependency"
890  " because they were dependency-free")
891  ;
892 
894  .name(name() + ".numIssueOrderDepStores")
895  .desc("Number of stores that got assigned issue order dependency"
896  " because they were dependency-free")
897  ;
898 
900  .name(name() + ".numIssueOrderDepOther")
901  .desc("Number of non load/store insts that got assigned issue order"
902  " dependency because they were dependency-free")
903  ;
904 
906  .name(name() + ".numFilteredNodes")
907  .desc("No. of nodes filtered out before writing the output trace")
908  ;
909 
911  .name(name() + ".maxNumDependents")
912  .desc("Maximum number or dependents on any instruction")
913  ;
914 
916  .name(name() + ".maxTempStoreSize")
917  .desc("Maximum size of the temporary store during the run")
918  ;
919 
921  .name(name() + ".maxPhysRegDepMapSize")
922  .desc("Maximum size of register dependency map")
923  ;
924 }
925 
926 const std::string&
928 {
929  return Record::RecordType_Name(type);
930 }
931 
932 const std::string
934 {
935  return ProbeListenerObject::name();
936 }
937 
938 void
940 {
941  // Write to trace all records in the depTrace.
942  writeDepTrace(depTrace.size());
943  // Delete the stream objects
944  delete dataTraceStream;
945  delete instTraceStream;
946 }
947 
949 ElasticTraceParams::create()
950 {
951  return new ElasticTrace(this);
952 }
int64_t compDelay
Computational delay after the last dependent inst.
Stats::Scalar numIssueOrderDepOther
Number of non load/store insts that got assigned an issue order dependency because they were dependen...
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:677
void compDelayPhysRegDep(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has a Phys...
#define DPRINTF(x,...)
Definition: trace.hh:212
void flushTraces()
Process any outstanding trace records, flush them out to the protobuf output streams and delete the s...
void regProbeListeners()
Register the probe listeners that is the methods called on a probe point notify() call...
ElasticTrace(const ElasticTraceParams *params)
Constructor.
OutputDirectory simout
Definition: output.cc:65
A ProtoOutputStream wraps a coded stream, potentially with compression, based on looking at the file ...
Definition: protoio.hh:92
bool isComp() const
Is the record a fetch triggering an Icache request.
void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
When an instruction gets squashed the destination register mapped to it is freed up in the rename sta...
FullO3CPU< O3CPUImpl > * cpu
Pointer to the O3CPU that is this listener's parent a.k.a.
void regEtraceListeners()
Register all listeners.
decltype(nullptr) constexpr NoFault
Definition: types.hh:189
void write(const google::protobuf::Message &msg)
Write a message to the stream, preprending it with the message size.
Definition: protoio.cc:84
Generic callback class.
Definition: callback.hh:41
The elastic trace is a type of probe listener and listens to probe points in multiple stages of the O...
STL pair class.
Definition: stl.hh:61
void assignRobDep(TraceInfo *past_record, TraceInfo *new_record)
The new_record has an order dependency on a past_record, thus update the new record's Rob dependency ...
std::unordered_map< InstSeqNum, InstExecInfo * > tempStore
Temporary store of InstExecInfo objects.
Control (misc) register.
Definition: reg_class.hh:45
bool isStore() const
Is the record a store.
void addDepTraceRecord(const DynInstPtr &head_inst, InstExecInfo *exec_info_ptr, bool commit)
Add a record to the dependency trace depTrace which is a sequential container.
bool allProbesReg
Whther the elastic trace listener has been registered for all probes.
uint32_t depWindowSize
The maximum distance for a dependency and is set by a top level level parameter.
std::vector< TraceInfo * >::iterator depTraceItr
Typedef of iterator to the instruction dependency trace.
Addr getPC() const
Accessor function for pc.
Definition: request.hh:715
std::vector< TraceInfo * > depTrace
The instruction dependency trace containing TraceInfo objects.
virtual void regStats()
Register statistics for this object.
Definition: sim_object.cc:105
void addSquashedInst(const DynInstPtr &head_inst)
Add an instruction that is at the head of the ROB and is squashed only if it is a load and a request ...
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition: core.cc:47
If you want a reference counting pointer to a mutable object, create it like this: ...
Definition: refcnt.hh:106
EventWrapper< ElasticTrace,&ElasticTrace::regEtraceListeners > regEtraceListenersEvent
Event to trigger registering this listener for all probe points.
void updateRegDep(const DynInstPtr &dyn_inst)
Record a Read After Write physical register dependency if there has been a write to the source regist...
void updateCommitOrderDep(TraceInfo *new_record, bool find_load_not_store)
Reverse iterate through the graph, search for a store-after-store or store-after-load dependency and ...
Stats::Scalar numIssueOrderDepStores
Number of store insts that got assigned an issue order dependency because they were dependency-free...
void recordExecTick(const DynInstPtr &dyn_inst)
Populate the execute timestamp field in an InstExecInfo object for an instruction in flight...
Stats::Scalar numOrderDepStores
Number of stores that got assigned a commit order dependency on a past load/store.
const Tick MaxTick
Definition: types.hh:65
Tick curTick()
The current simulated tick.
Definition: core.hh:47
uint8_t RegIndex
Definition: registers.hh:46
void writeDepTrace(uint32_t num_to_write)
Write out given number of records to the trace starting with the first record in depTrace and iterati...
Stats::Scalar numIssueOrderDepLoads
Number of load insts that got assigned an issue order dependency because they were dependency-free...
Stats::Scalar maxPhysRegDepMapSize
Maximum size of the map that holds the last writer to a physical register.
void recordToCommTick(const DynInstPtr &dyn_inst)
Populate the timestamp field in an InstExecInfo object for an instruction in flight when it is execut...
uint64_t Tick
Tick count type.
Definition: types.hh:63
ProtoOutputStream * dataTraceStream
Protobuf output stream for data dependency trace.
const RegIndex ZeroReg
Definition: registers.hh:77
This class is a minimal wrapper around SimObject.
Definition: probe.hh:100
std::unordered_map< InstSeqNum, TraceInfo * > traceInfoMap
Map where the instruction sequence number is mapped to the pointer to the TraceInfo object...
Addr getPaddr() const
Definition: request.hh:519
bool hasLoadCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load that completed earlier than the execute tick.
void registerExitCallback(Callback *callback)
Register an exit callback.
Definition: core.cc:116
uint64_t InstSeqNum
Definition: inst_seq.hh:40
std::list< InstSeqNum > physRegDepList
Tick getExecuteTick() const
Get the execute tick of the instruction.
Tick toCommitTick
Timestamp when instruction execution is completed in execute stage and instruction is marked as ready...
void regStats()
Register statistics for the elastic trace.
void updateIssueOrderDep(TraceInfo *new_record)
Reverse iterate through the graph, search for an issue order dependency for a new node and update the...
std::string resolve(const std::string &name) const
Returns relative file names prepended with name of this directory.
Definition: output.cc:200
Request::FlagsType reqFlags
bool hasStoreCommitted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a store sent earlier than the execute tick.
const std::string & typeToStr() const
Return string specifying the type of the node.
std::vector< ProbeListener * > listeners
Definition: probe.hh:104
Flags getFlags()
Accessor for flags.
Definition: request.hh:584
ProtoOutputStream * instTraceStream
Protobuf output stream for instruction fetch trace.
bool hasCompCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a comp node that completed earlier than the execute tick. ...
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:254
void clearTempStoreUntil(const DynInstPtr head_inst)
Clear entries in the temporary store of execution info objects to free allocated memory until the pre...
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:171
bool hasLoadBeenSent(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load sent earlier than the execute tick.
type
Definition: misc.hh:728
void fetchReqTrace(const RequestPtr &req)
Take the fields of the request class object that are relevant to create an instruction fetch request...
virtual const std::string name() const
Definition: sim_object.hh:117
bool isLoad() const
Is the record a load.
Stats::Scalar maxTempStoreSize
Maximum size of the temporary store mostly useful as a check that it is not growing.
Declaration of the Packet class.
std::set< InstSeqNum > physRegDepSet
Set of instruction sequence numbers that this instruction depends on due to Read After Write data dep...
std::list< InstSeqNum > robDepList
Addr getVaddr() const
Definition: request.hh:616
Stats::Scalar maxNumDependents
Maximum number of dependents on any instruction.
short int PhysRegIndex
Definition: comm.hh:57
bool firstWin
Used for checking the first window for processing and writing of dependency trace.
const bool traceVirtAddr
Whether to trace virtual addresses for memory requests.
Tick executeTick
Timestamp when instruction was first processed by execute stage.
void schedule(Event &event, Tick when)
Definition: eventq.hh:728
std::reverse_iterator< depTraceItr > depTraceRevItr
Typedef of the reverse iterator to the instruction dependency trace.
std::unordered_map< PhysRegIndex, InstSeqNum > physRegDepMap
Map for recording the producer of a physical register to check Read After Write dependencies.
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:287
const InstSeqNum startTraceInst
Number of instructions after which to enable tracing.
RegClass regIdxToClass(TheISA::RegIndex reg_idx, TheISA::RegIndex *rel_reg_idx=NULL)
Map a 'unified' architectural register index to its register class.
Definition: reg_class.hh:66
InstSeqNum lastClearedSeqNum
The last cleared instruction sequence number used to free up the memory allocated in the temporary st...
Helper template class to turn a simple class member function into a callback.
Definition: callback.hh:64
void addCommittedInst(const DynInstPtr &head_inst)
Add an instruction that is at the head of the ROB and is committed.
Stats::Scalar numRegDep
Number of register dependencies recorded during tracing.
fatal_if(p->js_features.size() > 16,"Too many job slot feature registers specified (%i)\n", p->js_features.size())
RecordType type
The type of trace record for the instruction node.
unsigned getSize() const
Definition: request.hh:552
ProbeListenerArg generates a listener for the class of Arg and the class type T which is the class co...
Definition: probe.hh:217
#define inform(...)
Definition: misc.hh:221
Stats::Scalar numFilteredNodes
Number of filtered nodes.
const std::string name() const
Returns the name of the trace probe listener.
void compDelayRob(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has an ROB...
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:102
#define DPRINTFR(...)
Definition: trace.hh:214
static int numSimulatedInsts()
Definition: cpu_dummy.hh:48

Generated on Fri Jun 9 2017 13:03:43 for gem5 by doxygen 1.8.6