/****************************************************************************** ** FILE: dynamic.fs ** Dynamic execution engine simulator. */ #include "param.h" #include "instq.fs" #include "sparc_v9_decode.fs" #include "execute.fs" #include "rename_init.fs" #include "cache.fs" extern num_insts_retired : unsigned[64]; val num_insts_retired = 0?cvt(unsigned[64]); extern CPU_cycle : unsigned[64]; val CPU_cycle = 0?cvt(unsigned[64]); // // Type of data in each fuQ entry. type FU_Data = struct { ftype : uchar, ftime : uchar }; fun init_taken(fu) { return fu.ftype > 0x07; } fun init_ftype(fu) { return fu.ftype & 0x7f; } // // Simulator initialization type InitType = (stream,stream,cwp_t,cwp_t,cwp_t,FU_Data queue,stream queue); fun initialize() : InitType { rmap_init(); cache_init(); branch_init(); regs_init(system?start_sp); return (system?start_pc,system?start_pc+4,0b0?ext(5), (NWINDOWS-2)?cvt(cwp_t),0b0?ext(5), queue{},queue{}); } val init : InitType = initialize(); // // Sumilator main() function fun main(var pc1, var npc1, var cwp1, var cansave1, var canrestore1, var fuQ : FU_Data queue, var jmpQ : stream queue) { PC = pc1; // PC of oldest instruction nPC = npc1; // PC of second oldest instruction CWP0 = cwp1; // CWP before oldest instruction CWP = CWP0; // CWP after newest instruction CANSAVE = cansave1; // CANSAVE after newest instruction CANRESTORE = canrestore1; // CANRESTORE after newest instruction instq?clear(); // clear instq; make it rt-stat // Initialize these here to make them (rt-)static for as long as possible nPC2 = 0?cvt(stream); is_ccti = false; is_jmpl = false; is_call = false; rollback = false; fu_num_allocated = array(FU_END1) { 0 }; is_spec = false; no_loads = true; unk_load_addr = false; unk_store_addr = false; new_inst = struct { pc = 0?cvt(stream), npc = 0?cvt(stream), srcq = queue{}, destq = queue{}, op = 0?cvt(ushort), ftype = 0?cvt(uchar), ftime = 0?cvt(uchar), taken = false, delta = 0?cvt(char), trap = false, done = false, align = 0?cvt(uchar) }; // // Flags to control execution is_trap = false; // flag if trapping instruction in pipe in_init = fuQ?length()>0; // flag to re-fetch instructions is_taken = false; val done = false; // flag to trigger exit from main() // Flag to help find and label instructions // that cause main to exit when they retire. val finish = false; /////////////////////////////////////////////////////////////////////////// // Loop until we want to generate a new index. while(!done) { // Maximum number of instructions to fetch & decode val num_to_fetch = FETCH_WIDTH; if(is_trap) num_to_fetch = 0; else if(in_init) num_to_fetch = num_to_fetch + fuQ?length(); // Allow at most OOLIMIT instructions into the out-of-order queue if(num_to_fetch > (OOLIMIT - instq?length())) num_to_fetch = OOLIMIT - instq?length(); /////////////////////////////////////////////////////////////////////// // Loop to fetch instructions. At the start of any call to main, // this loop re-fetches & decodes all instructions in flight. // On subsequent iterations this loop fetches new instructions. while(num_to_fetch > 0) { if(instq?length() > 1) { switch(instq[-2].pc) { case (pat jmpl || retrn): // We cannot fetch instructions beyond the delay slot of // an indirect jump until the jump address is determined if(instq[-2].ftime > 0?cvt(uchar)) break; // break while var inst = instq[-1]; // instruction in delay slot // When ready, update PC to jump target val inum = instq?length() - 2; PC = inst.npc; // If there is no taken branch in the delay slot, then // update nPC to the instruction following the jmp target if(!inst.taken) nPC = PC + 4; default: ; } } nPC2 = nPC + 4; // predict the next nPC value // // Fetch next instruction num_to_fetch = num_to_fetch - 1; init_inst(new_inst,PC,nPC); if(!in_init) rmap_fetch(); else { // If in_init => update branch take flag before deocoding is_taken = init_taken(fuQ[+0]); } // // Decode instruction; Use instruction semanics. is_ccti = false; is_jmpl = false; is_call = false; PC?exec(); instq?push_back(new_inst); var inst = instq[-1]; if(in_init) { // During initialization: // Correct instruction status from fuQ data. inst.ftype = init_ftype(fuQ[+0]); inst.ftime = fuQ[+0].ftime; fuQ?pop_front(); in_init = fuQ?length() > 0; if(is_jmpl) { if(jmpQ?length() > 0) nPC2 = jmpQ?pop_front(); } } else if(is_jmpl) { // Can only fetch one more instruction at this time. if(num_to_fetch > 1) num_to_fetch = 1; } if(is_trap) { // Trapping instructions must retire before more are fetched assert(!in_init); if(inst.ftime <= 0?cvt(uchar)) is_trap = false; // trap already executed else { inst.trap = true; num_to_fetch = 0; } } // Flag if we found a CCTI, JMPL, or CALL. if(is_ccti || is_jmpl || is_call) finish = true; PC = nPC; nPC = nPC2; // At this point PC & nPC point to the // next two istructions to be fetched. // // Mark instruction before the first sequential sequence // of instructions following a CCTI, JMPL, or CALL. These // are the points where we want to return from main (when // the instruction retires). if(finish) if(PC+4 == nPC) { inst.done = true; finish = false; } } assert(fuQ?length() == 0 && jmpQ?length() == 0); // // Execute ready instructions: no_loads = true; unk_load_addr = false; unk_store_addr = false; // flags for branch behaviour (doubles for delay slots) val branch_inum = 0; val _branch_inum = 0; is_spec = false; val _is_spec = false; val do_rollback = false; val _do_rollback = false; val ii = 0; fu_clear(); while(ii < instq?length()) { var inst = instq[+ii]; // Execute the instruction (if ready) rollback = false; val executed = execute(inst,ii); // Shift the branch control flags branch_inum = _branch_inum; is_spec = _is_spec; do_rollback = _do_rollback; _do_rollback = rollback; if(rollback) _branch_inum = ii; // branch to rollback val op = inst.op?cvt(ulong); if(OP_LOAD_BEGIN <= op && op <= OP_LOAD_END) { no_loads = false; // flag when we see a load if(inst.ftype == FU_ADDR?cvt(uchar)) { // Load address not yet computed unk_load_addr = true; } } else if((OP_STORE_BEGIN <= op && op <= OP_STORE_END) || (OP_SWAP_BEGIN <= op && op <= OP_SWAP_END)) { if(inst.ftype == FU_ADDR?cvt(uchar)) { // Store address not yet computed unk_store_addr = true; } } else if(OP_BRANCH_BEGIN <= op && op <= OP_BRANCH_END) { // Flag if path becomes speculative if(inst.ftime > 0?cvt(uchar)) _is_spec = true; switch(inst.pc) { case (pat a==0b1): // branch is annulled branch_inum = _branch_inum; is_spec = _is_spec; do_rollback = _do_rollback; _do_rollback = false; default: // branch not annulled ; } } ii = ii + 1; if(do_rollback) break; } if(_do_rollback) { // Delay slot not fetched yet. assert(!do_rollback); branch_inum = _branch_inum; do_rollback = true; } if(do_rollback) { // Branch Mispredict detected! ROLLBACK!!! // // To rollback we will remove all instructions, from the // mispredicted branch onward, from the out-of-order // queue. By putting the simulator back in init mode (set // in_init to true) and using the fuQ, we will force the // instruction fetch code above to refetch the branch (and // perhaps the branch delay slot). The branch (and delay // slot instruction) will get re-decoded, but this time // the branch will be set to go in the corrected // direction. in_init = true; // return to init mode var branch_inst = instq[+branch_inum]; PC = branch_inst.pc; // reset PC to refetch branch nPC = branch_inst.npc; // reset nPC to delay slot val fu : FU_Data; // Record the FU_Data for the branch. fu.ftype = branch_inst.ftype | (!branch_inst.taken)?ext(8)<<7; fu.ftime = 0?cvt(uchar); fuQ?push_back(fu); assert(ii > branch_inum && ii <= branch_inum + 2); val jj = branch_inum + 1; while(jj < ii) { var inst = instq[+jj]; fu.ftype = inst.taken?ext(8)<<7 | inst.ftype; fu.ftime = inst.ftime; fuQ?push_back(fu); jj = jj + 1; } // assert(jj == ii); // Rollback discarded stores while(jj < instq?length()) { val op = instq[+jj].op?cvt(ulong); if((OP_STORE_BEGIN <= op && op <= OP_STORE_END) || (OP_SWAP_BEGIN <= op && op <= OP_SWAP_END)) { if(instq[+jj].ftype == FU_STORE?cvt(uchar)) { cache_store_rollback(jj); } } jj = jj + 1; } // Rollback rename map rmap_rollback(ii); // Clear the instq from the branch onward. while(branch_inum < instq?length()) { var inst = instq[-1]; CWP = ((CWP?cvt(char) + NWINDOWS?cvt(char) - inst.delta) % NWINDOWS?cvt(char))?bits(5); if(!inst.trap) { CANSAVE = ((CANSAVE?cvt(char) + NWINDOWS?cvt(char) + inst.delta) % NWINDOWS?cvt(char))?bits(5); CANRESTORE = ((CANRESTORE?cvt(char) + NWINDOWS?cvt(char) - inst.delta) % NWINDOWS?cvt(char))?bits(5); } else is_trap = false; instq?pop_back(); } } // Retire instructions with ftime==0 that are older than // the first instruction having ftime > 0 (ie, still executing). val num_retired = 0; while(num_retired < instq?length()) { var inst = instq[+num_retired]; if(inst.ftime > 0?cvt(uchar)) break; #ifdef MEMOIZE // Do we exit from main? if(inst.done) done = true; #endif // Retire oldest instruction CWP0 = ((CWP0?cvt(char) + NWINDOWS?cvt(char) + inst.delta) % NWINDOWS?cvt(char))?bits(5); val op = inst.op?cvt(ulong); if(OP_BRANCH_BEGIN <= op && op <= OP_BRANCH_END) branch_direction(inst.pc?addr,inst.taken); num_retired = num_retired + 1; } if(num_retired > 0) { rmap_retire(num_retired); cache_retire(num_retired); instq?pop_front(num_retired); num_insts_retired = (num_insts_retired + num_retired?cvt(unsigned[64])); } CPU_cycle = CPU_cycle + 1?ext(64); } /////////////////////////////////////////////////////////////////////////// // Exiting main() to generate a new memoization index entry. // The following statements set the init variable, providing // the correct arguments to the next call to main(). // // // Set PC & nPC fields of the init variable if(instq?length() > 0) { var inst = instq[+0]; pc1 = inst.pc; npc1 = inst.npc; } else { pc1 = PC; npc1 = nPC; } cwp1 = CWP0; // Set CWP field in init variable // // Construct fuQ & jmpQ while(instq?length() > 0) { var inst = instq[-1]; // Add entry to fuQ for instruction val fu = struct { ftype = inst.taken?ext(8)<<7 | inst.ftype, ftime = inst.ftime }; fuQ?push_front(fu); switch(inst.pc) { case (pat jmpl || retrn): if(inst.ftime <= 0?cvt(uchar)) { // Add entry to jmpQ for JMPL or RETRN instructions val target = (rmap_read(inst.srcq[+0],instq?length()-1) + rmap_read(inst.srcq[+1],instq?length()-1)); jmpQ?push_front(target?cvt(stream)); } default: ; } if(!inst.trap) { CANSAVE = ((CANSAVE?cvt(char) + NWINDOWS?cvt(char) + inst.delta) % NWINDOWS?cvt(char))?bits(5); CANRESTORE = ((CANRESTORE?cvt(char) + NWINDOWS?cvt(char) - inst.delta) % NWINDOWS?cvt(char))?bits(5); } instq?pop_back(); // delete instq entry } cansave1 = CANSAVE; canrestore1 = CANRESTORE; }