/******************************************************************************
** FILE: dynamic.fs
** Dynamic execution engine simulator.
*/
#include "param.h"
#include "instq.fs"
#include "sparc_v9_decode.fs"
#include "execute.fs"
#include "rename_init.fs"
#include "cache.fs"
extern num_insts_retired : unsigned[64];
val num_insts_retired = 0?cvt(unsigned[64]);
extern CPU_cycle : unsigned[64];
val CPU_cycle = 0?cvt(unsigned[64]);
//
// Type of data in each fuQ entry.
type FU_Data = struct { ftype : uchar, ftime : uchar };
fun init_taken(fu) { return fu.ftype > 0x07; }
fun init_ftype(fu) { return fu.ftype & 0x7f; }
//
// Simulator initialization
type InitType = (stream,stream,cwp_t,cwp_t,cwp_t,FU_Data queue,stream queue);
fun initialize() : InitType
{
rmap_init();
cache_init();
branch_init();
regs_init(system?start_sp);
return (system?start_pc,system?start_pc+4,0b0?ext(5),
(NWINDOWS-2)?cvt(cwp_t),0b0?ext(5),
queue{},queue{});
}
val init : InitType = initialize();
//
// Sumilator main() function
fun main(var pc1, var npc1, var cwp1,
var cansave1, var canrestore1,
var fuQ : FU_Data queue,
var jmpQ : stream queue)
{
PC = pc1; // PC of oldest instruction
nPC = npc1; // PC of second oldest instruction
CWP0 = cwp1; // CWP before oldest instruction
CWP = CWP0; // CWP after newest instruction
CANSAVE = cansave1; // CANSAVE after newest instruction
CANRESTORE = canrestore1; // CANRESTORE after newest instruction
instq?clear(); // clear instq; make it rt-stat
// Initialize these here to make them (rt-)static for as long as possible
nPC2 = 0?cvt(stream);
is_ccti = false;
is_jmpl = false;
is_call = false;
rollback = false;
fu_num_allocated = array(FU_END1) { 0 };
is_spec = false;
no_loads = true;
unk_load_addr = false;
unk_store_addr = false;
new_inst = struct { pc = 0?cvt(stream), npc = 0?cvt(stream),
srcq = queue{}, destq = queue{},
op = 0?cvt(ushort), ftype = 0?cvt(uchar),
ftime = 0?cvt(uchar), taken = false,
delta = 0?cvt(char), trap = false,
done = false, align = 0?cvt(uchar) };
//
// Flags to control execution
is_trap = false; // flag if trapping instruction in pipe
in_init = fuQ?length()>0; // flag to re-fetch instructions
is_taken = false;
val done = false; // flag to trigger exit from main()
// Flag to help find and label instructions
// that cause main to exit when they retire.
val finish = false;
///////////////////////////////////////////////////////////////////////////
// Loop until we want to generate a new index.
while(!done) {
// Maximum number of instructions to fetch & decode
val num_to_fetch = FETCH_WIDTH;
if(is_trap) num_to_fetch = 0;
else if(in_init) num_to_fetch = num_to_fetch + fuQ?length();
// Allow at most OOLIMIT instructions into the out-of-order queue
if(num_to_fetch > (OOLIMIT - instq?length()))
num_to_fetch = OOLIMIT - instq?length();
///////////////////////////////////////////////////////////////////////
// Loop to fetch instructions. At the start of any call to main,
// this loop re-fetches & decodes all instructions in flight.
// On subsequent iterations this loop fetches new instructions.
while(num_to_fetch > 0) {
if(instq?length() > 1) {
switch(instq[-2].pc) {
case (pat jmpl || retrn):
// We cannot fetch instructions beyond the delay slot of
// an indirect jump until the jump address is determined
if(instq[-2].ftime > 0?cvt(uchar)) break; // break while
var inst = instq[-1]; // instruction in delay slot
// When ready, update PC to jump target
val inum = instq?length() - 2;
PC = inst.npc;
// If there is no taken branch in the delay slot, then
// update nPC to the instruction following the jmp target
if(!inst.taken) nPC = PC + 4;
default: ;
}
}
nPC2 = nPC + 4; // predict the next nPC value
//
// Fetch next instruction
num_to_fetch = num_to_fetch - 1;
init_inst(new_inst,PC,nPC);
if(!in_init) rmap_fetch();
else {
// If in_init => update branch take flag before deocoding
is_taken = init_taken(fuQ[+0]);
}
//
// Decode instruction; Use instruction semanics.
is_ccti = false;
is_jmpl = false;
is_call = false;
PC?exec();
instq?push_back(new_inst);
var inst = instq[-1];
if(in_init) {
// During initialization:
// Correct instruction status from fuQ data.
inst.ftype = init_ftype(fuQ[+0]);
inst.ftime = fuQ[+0].ftime;
fuQ?pop_front();
in_init = fuQ?length() > 0;
if(is_jmpl) {
if(jmpQ?length() > 0)
nPC2 = jmpQ?pop_front();
}
} else if(is_jmpl) {
// Can only fetch one more instruction at this time.
if(num_to_fetch > 1) num_to_fetch = 1;
}
if(is_trap) {
// Trapping instructions must retire before more are fetched
assert(!in_init);
if(inst.ftime <= 0?cvt(uchar))
is_trap = false; // trap already executed
else {
inst.trap = true;
num_to_fetch = 0;
}
}
// Flag if we found a CCTI, JMPL, or CALL.
if(is_ccti || is_jmpl || is_call) finish = true;
PC = nPC; nPC = nPC2;
// At this point PC & nPC point to the
// next two istructions to be fetched.
//
// Mark instruction before the first sequential sequence
// of instructions following a CCTI, JMPL, or CALL. These
// are the points where we want to return from main (when
// the instruction retires).
if(finish) if(PC+4 == nPC)
{ inst.done = true; finish = false; }
}
assert(fuQ?length() == 0 && jmpQ?length() == 0);
//
// Execute ready instructions:
no_loads = true;
unk_load_addr = false;
unk_store_addr = false;
// flags for branch behaviour (doubles for delay slots)
val branch_inum = 0; val _branch_inum = 0;
is_spec = false; val _is_spec = false;
val do_rollback = false; val _do_rollback = false;
val ii = 0; fu_clear();
while(ii < instq?length()) {
var inst = instq[+ii];
// Execute the instruction (if ready)
rollback = false;
val executed = execute(inst,ii);
// Shift the branch control flags
branch_inum = _branch_inum; is_spec = _is_spec;
do_rollback = _do_rollback;
_do_rollback = rollback;
if(rollback) _branch_inum = ii; // branch to rollback
val op = inst.op?cvt(ulong);
if(OP_LOAD_BEGIN <= op && op <= OP_LOAD_END) {
no_loads = false; // flag when we see a load
if(inst.ftype == FU_ADDR?cvt(uchar)) {
// Load address not yet computed
unk_load_addr = true;
}
} else if((OP_STORE_BEGIN <= op && op <= OP_STORE_END) ||
(OP_SWAP_BEGIN <= op && op <= OP_SWAP_END)) {
if(inst.ftype == FU_ADDR?cvt(uchar)) {
// Store address not yet computed
unk_store_addr = true;
}
} else if(OP_BRANCH_BEGIN <= op && op <= OP_BRANCH_END) {
// Flag if path becomes speculative
if(inst.ftime > 0?cvt(uchar)) _is_spec = true;
switch(inst.pc) {
case (pat a==0b1): // branch is annulled
branch_inum = _branch_inum; is_spec = _is_spec;
do_rollback = _do_rollback; _do_rollback = false;
default: // branch not annulled
;
}
}
ii = ii + 1;
if(do_rollback) break;
}
if(_do_rollback) {
// Delay slot not fetched yet.
assert(!do_rollback);
branch_inum = _branch_inum;
do_rollback = true;
}
if(do_rollback) {
// Branch Mispredict detected! ROLLBACK!!!
//
// To rollback we will remove all instructions, from the
// mispredicted branch onward, from the out-of-order
// queue. By putting the simulator back in init mode (set
// in_init to true) and using the fuQ, we will force the
// instruction fetch code above to refetch the branch (and
// perhaps the branch delay slot). The branch (and delay
// slot instruction) will get re-decoded, but this time
// the branch will be set to go in the corrected
// direction.
in_init = true; // return to init mode
var branch_inst = instq[+branch_inum];
PC = branch_inst.pc; // reset PC to refetch branch
nPC = branch_inst.npc; // reset nPC to delay slot
val fu : FU_Data;
// Record the FU_Data for the branch.
fu.ftype = branch_inst.ftype | (!branch_inst.taken)?ext(8)<<7;
fu.ftime = 0?cvt(uchar); fuQ?push_back(fu);
assert(ii > branch_inum && ii <= branch_inum + 2);
val jj = branch_inum + 1;
while(jj < ii) {
var inst = instq[+jj];
fu.ftype = inst.taken?ext(8)<<7 | inst.ftype;
fu.ftime = inst.ftime;
fuQ?push_back(fu);
jj = jj + 1;
}
// assert(jj == ii);
// Rollback discarded stores
while(jj < instq?length()) {
val op = instq[+jj].op?cvt(ulong);
if((OP_STORE_BEGIN <= op && op <= OP_STORE_END) ||
(OP_SWAP_BEGIN <= op && op <= OP_SWAP_END)) {
if(instq[+jj].ftype == FU_STORE?cvt(uchar)) {
cache_store_rollback(jj);
}
}
jj = jj + 1;
}
// Rollback rename map
rmap_rollback(ii);
// Clear the instq from the branch onward.
while(branch_inum < instq?length()) {
var inst = instq[-1];
CWP = ((CWP?cvt(char) + NWINDOWS?cvt(char) - inst.delta)
% NWINDOWS?cvt(char))?bits(5);
if(!inst.trap) {
CANSAVE = ((CANSAVE?cvt(char) + NWINDOWS?cvt(char) +
inst.delta) % NWINDOWS?cvt(char))?bits(5);
CANRESTORE = ((CANRESTORE?cvt(char) + NWINDOWS?cvt(char) -
inst.delta) % NWINDOWS?cvt(char))?bits(5);
} else is_trap = false;
instq?pop_back();
}
}
// Retire instructions with ftime==0 that are older than
// the first instruction having ftime > 0 (ie, still executing).
val num_retired = 0;
while(num_retired < instq?length()) {
var inst = instq[+num_retired];
if(inst.ftime > 0?cvt(uchar)) break;
#ifdef MEMOIZE
// Do we exit from main?
if(inst.done) done = true;
#endif
// Retire oldest instruction
CWP0 = ((CWP0?cvt(char) + NWINDOWS?cvt(char) + inst.delta)
% NWINDOWS?cvt(char))?bits(5);
val op = inst.op?cvt(ulong);
if(OP_BRANCH_BEGIN <= op && op <= OP_BRANCH_END)
branch_direction(inst.pc?addr,inst.taken);
num_retired = num_retired + 1;
}
if(num_retired > 0) {
rmap_retire(num_retired);
cache_retire(num_retired);
instq?pop_front(num_retired);
num_insts_retired = (num_insts_retired +
num_retired?cvt(unsigned[64]));
}
CPU_cycle = CPU_cycle + 1?ext(64);
}
///////////////////////////////////////////////////////////////////////////
// Exiting main() to generate a new memoization index entry.
// The following statements set the init variable, providing
// the correct arguments to the next call to main().
//
//
// Set PC & nPC fields of the init variable
if(instq?length() > 0) {
var inst = instq[+0];
pc1 = inst.pc; npc1 = inst.npc;
} else { pc1 = PC; npc1 = nPC; }
cwp1 = CWP0; // Set CWP field in init variable
//
// Construct fuQ & jmpQ
while(instq?length() > 0) {
var inst = instq[-1];
// Add entry to fuQ for instruction
val fu = struct { ftype = inst.taken?ext(8)<<7 | inst.ftype,
ftime = inst.ftime };
fuQ?push_front(fu);
switch(inst.pc) {
case (pat jmpl || retrn):
if(inst.ftime <= 0?cvt(uchar)) {
// Add entry to jmpQ for JMPL or RETRN instructions
val target = (rmap_read(inst.srcq[+0],instq?length()-1) +
rmap_read(inst.srcq[+1],instq?length()-1));
jmpQ?push_front(target?cvt(stream));
}
default: ;
}
if(!inst.trap) {
CANSAVE = ((CANSAVE?cvt(char) + NWINDOWS?cvt(char) +
inst.delta) % NWINDOWS?cvt(char))?bits(5);
CANRESTORE = ((CANRESTORE?cvt(char) + NWINDOWS?cvt(char) -
inst.delta) % NWINDOWS?cvt(char))?bits(5);
}
instq?pop_back(); // delete instq entry
}
cansave1 = CANSAVE;
canrestore1 = CANRESTORE;
}