/******************************************************************************
** FILE: dynamic.fs
** Dynamic execution engine simulator.
*/

#include "param.h"
#include "instq.fs"
#include "sparc_v9_decode.fs"
#include "execute.fs"
#include "rename_init.fs"
#include "cache.fs"

extern num_insts_retired : unsigned[64];
val num_insts_retired = 0?cvt(unsigned[64]);

extern CPU_cycle : unsigned[64];
val CPU_cycle = 0?cvt(unsigned[64]);

//
// Type of data in each fuQ entry.

type FU_Data = struct { ftype : uchar, ftime : uchar };
fun init_taken(fu) { return fu.ftype > 0x07; }
fun init_ftype(fu) { return fu.ftype & 0x7f; }

//
// Simulator initialization

type InitType = (stream,stream,cwp_t,cwp_t,cwp_t,FU_Data queue,stream queue);

fun initialize() : InitType
{
    rmap_init();
    cache_init();
    branch_init();
    regs_init(system?start_sp);
    return (system?start_pc,system?start_pc+4,0b0?ext(5),
	    (NWINDOWS-2)?cvt(cwp_t),0b0?ext(5),
	    queue{},queue{});
}

val init : InitType = initialize();

//
// Sumilator main() function

fun main(var pc1, var npc1, var cwp1,
	 var cansave1, var canrestore1,
	 var fuQ : FU_Data queue,
	 var jmpQ : stream queue)
{
    PC = pc1;			// PC of oldest instruction
    nPC = npc1;			// PC of second oldest instruction
    CWP0 = cwp1;		// CWP before oldest instruction
    CWP = CWP0;			// CWP after newest instruction
    CANSAVE = cansave1;		// CANSAVE after newest instruction
    CANRESTORE = canrestore1;	// CANRESTORE after newest instruction

    instq?clear();		// clear instq; make it rt-stat

    // Initialize these here to make them (rt-)static for as long as possible
    nPC2 = 0?cvt(stream);
    is_ccti = false;
    is_jmpl = false;
    is_call = false;
    rollback = false;
    fu_num_allocated = array(FU_END1) { 0 };
    is_spec = false;
    no_loads = true;
    unk_load_addr = false;
    unk_store_addr = false;
    new_inst = struct { pc = 0?cvt(stream), npc = 0?cvt(stream),
			srcq = queue{}, destq = queue{},
			op = 0?cvt(ushort), ftype = 0?cvt(uchar),
			ftime = 0?cvt(uchar), taken = false,
			delta = 0?cvt(char), trap = false,
			done = false, align = 0?cvt(uchar) };

    //
    // Flags to control execution

    is_trap = false;			// flag if trapping instruction in pipe
    in_init = fuQ?length()>0;		// flag to re-fetch instructions
    is_taken = false;

    val done = false;	// flag to trigger exit from main()

    // Flag to help find and label instructions
    // that cause main to exit when they retire.
    val finish = false;

    ///////////////////////////////////////////////////////////////////////////
    // Loop until we want to generate a new index.

    while(!done) {

	// Maximum number of instructions to fetch & decode
	val num_to_fetch = FETCH_WIDTH;
	if(is_trap) num_to_fetch = 0;
	else if(in_init) num_to_fetch = num_to_fetch + fuQ?length();

	// Allow at most OOLIMIT instructions into the out-of-order queue
	if(num_to_fetch > (OOLIMIT - instq?length()))
	    num_to_fetch = OOLIMIT - instq?length();

	///////////////////////////////////////////////////////////////////////
	// Loop to fetch instructions. At the start of any call to main,
	// this loop re-fetches & decodes all instructions in flight.
	// On subsequent iterations this loop fetches new instructions.

	while(num_to_fetch > 0) {

	    if(instq?length() > 1) {
		switch(instq[-2].pc) {
		 case (pat jmpl || retrn):
		    // We cannot fetch instructions beyond the delay slot of
		    // an indirect jump until the jump address is determined
		    if(instq[-2].ftime > 0?cvt(uchar)) break;	// break while

		    var inst = instq[-1];	// instruction in delay slot

		    // When ready, update PC to jump target
		    val inum = instq?length() - 2;
		    PC = inst.npc;

		    // If there is no taken branch in the delay slot, then
		    // update nPC to the instruction following the jmp target
		    if(!inst.taken) nPC = PC + 4;

		 default: ;
		}
	    }

	    nPC2 = nPC + 4;	// predict the next nPC value

	    //
	    // Fetch next instruction

	    num_to_fetch = num_to_fetch - 1;

	    init_inst(new_inst,PC,nPC);

	    if(!in_init) rmap_fetch();
	    else {
		// If in_init => update branch take flag before deocoding
		is_taken = init_taken(fuQ[+0]);
	    }

	    //
	    // Decode instruction; Use instruction semanics.

	    is_ccti = false;
	    is_jmpl = false;
	    is_call = false;

	    PC?exec();

	    instq?push_back(new_inst);
	    var inst = instq[-1];

	    if(in_init) {
		// During initialization:
		// Correct instruction status from fuQ data.
		inst.ftype = init_ftype(fuQ[+0]);
		inst.ftime = fuQ[+0].ftime;
		fuQ?pop_front();

		in_init = fuQ?length() > 0;

		if(is_jmpl) {
		    if(jmpQ?length() > 0)
			nPC2 = jmpQ?pop_front();
		}
	    } else if(is_jmpl) {
		// Can only fetch one more instruction at this time.
		if(num_to_fetch > 1) num_to_fetch = 1;
	    }

	    if(is_trap) {
		// Trapping instructions must retire before more are fetched
		assert(!in_init);
		if(inst.ftime <= 0?cvt(uchar))
		    is_trap = false;	// trap already executed
		else {
		    inst.trap = true;
		    num_to_fetch = 0;
		}
	    }

	    // Flag if we found a CCTI, JMPL, or CALL.
	    if(is_ccti || is_jmpl || is_call) finish = true;

	    PC = nPC; nPC = nPC2;
	    // At this point  PC & nPC point to the
	    // next two istructions to be fetched.

	    //
	    // Mark instruction before the first sequential sequence
	    // of instructions following a CCTI, JMPL, or CALL. These
	    // are the points where we want to return from main (when
	    // the instruction retires).

	    if(finish) if(PC+4 == nPC)
	    { inst.done = true; finish = false; }
        }

	assert(fuQ?length() == 0 && jmpQ?length() == 0);

	//
	// Execute ready instructions:

	no_loads = true;
	unk_load_addr = false;
	unk_store_addr = false;

	// flags for branch behaviour	(doubles for delay slots)
	val branch_inum = 0;		val _branch_inum = 0;
	is_spec = false;		val _is_spec = false;
	val do_rollback = false;	val _do_rollback = false;

	val ii = 0; fu_clear();
	while(ii < instq?length()) {
	    var inst = instq[+ii];

	    // Execute the instruction (if ready)
	    rollback = false;
	    val executed = execute(inst,ii);

	    // Shift the branch control flags
	    branch_inum = _branch_inum; is_spec = _is_spec;
	    do_rollback = _do_rollback;
	    _do_rollback = rollback;
	    if(rollback) _branch_inum = ii;	// branch to rollback

	    val op = inst.op?cvt(ulong);
	    if(OP_LOAD_BEGIN <= op && op <= OP_LOAD_END) {
		no_loads = false;		// flag when we see a load
		if(inst.ftype == FU_ADDR?cvt(uchar)) {
		    // Load address not yet computed
		    unk_load_addr = true;
		}

	    } else if((OP_STORE_BEGIN <= op && op <= OP_STORE_END) ||
		      (OP_SWAP_BEGIN <= op && op <= OP_SWAP_END)) {
		if(inst.ftype == FU_ADDR?cvt(uchar)) {
		    // Store address not yet computed
		    unk_store_addr = true;
		}

	    } else if(OP_BRANCH_BEGIN <= op && op <= OP_BRANCH_END) {

		// Flag if path becomes speculative
		if(inst.ftime > 0?cvt(uchar)) _is_spec = true;

		switch(inst.pc) {
		 case (pat a==0b1):	// branch is annulled
		    branch_inum = _branch_inum; is_spec = _is_spec;
		    do_rollback = _do_rollback; _do_rollback = false;
		 default:		// branch not annulled
		    ;
		}
	    }

	    ii = ii + 1;

	    if(do_rollback) break;
	}

	if(_do_rollback) {
	    // Delay slot not fetched yet.
	    assert(!do_rollback);
	    branch_inum = _branch_inum;
	    do_rollback = true;
	}

	if(do_rollback) {
	    // Branch Mispredict detected! ROLLBACK!!!
	    //
	    // To rollback we will remove all instructions, from the
	    // mispredicted branch onward, from the out-of-order
	    // queue.  By putting the simulator back in init mode (set
	    // in_init to true) and using the fuQ, we will force the
	    // instruction fetch code above to refetch the branch (and
	    // perhaps the branch delay slot). The branch (and delay
	    // slot instruction) will get re-decoded, but this time
	    // the branch will be set to go in the corrected
	    // direction.

	    in_init = true;			// return to init mode

	    var branch_inst = instq[+branch_inum];
	    PC = branch_inst.pc;		// reset PC to refetch branch
	    nPC = branch_inst.npc;		// reset nPC to delay slot

	    val fu : FU_Data;

	    // Record the FU_Data for the branch.
	    fu.ftype = branch_inst.ftype | (!branch_inst.taken)?ext(8)<<7;
	    fu.ftime = 0?cvt(uchar); fuQ?push_back(fu);

	    assert(ii > branch_inum && ii <= branch_inum + 2);
	    val jj = branch_inum + 1;
	    while(jj < ii) {
		var inst = instq[+jj];
		fu.ftype = inst.taken?ext(8)<<7 | inst.ftype;
		fu.ftime = inst.ftime;
		fuQ?push_back(fu);
		jj = jj + 1;
	    }
	    // assert(jj == ii);

	    // Rollback discarded stores
	    while(jj < instq?length()) {
		val op = instq[+jj].op?cvt(ulong);
		if((OP_STORE_BEGIN <= op && op <= OP_STORE_END) ||
		   (OP_SWAP_BEGIN <= op && op <= OP_SWAP_END)) {
		    if(instq[+jj].ftype == FU_STORE?cvt(uchar)) {
			cache_store_rollback(jj);
		    }
		}
		jj = jj + 1;
	    }

	    // Rollback rename map
	    rmap_rollback(ii);

	    // Clear the instq from the branch onward.
	    while(branch_inum < instq?length()) {
		var inst = instq[-1];
		CWP = ((CWP?cvt(char) + NWINDOWS?cvt(char) - inst.delta)
		       % NWINDOWS?cvt(char))?bits(5);
		if(!inst.trap) {
		    CANSAVE = ((CANSAVE?cvt(char) + NWINDOWS?cvt(char) +
				inst.delta) % NWINDOWS?cvt(char))?bits(5);
		    CANRESTORE = ((CANRESTORE?cvt(char) + NWINDOWS?cvt(char) -
				   inst.delta) % NWINDOWS?cvt(char))?bits(5);
		} else is_trap = false;
		instq?pop_back();
	    }
	}

	// Retire instructions with ftime==0 that are older than
	// the first instruction having ftime > 0 (ie, still executing).
	val num_retired = 0;
	while(num_retired < instq?length()) {
	    var inst = instq[+num_retired];
	    if(inst.ftime > 0?cvt(uchar)) break;

#ifdef MEMOIZE
	    // Do we exit from main?
	    if(inst.done) done = true;
#endif

	    // Retire oldest instruction
	    CWP0 = ((CWP0?cvt(char) + NWINDOWS?cvt(char) + inst.delta)
		    % NWINDOWS?cvt(char))?bits(5);

	    val op = inst.op?cvt(ulong);
	    if(OP_BRANCH_BEGIN <= op && op <= OP_BRANCH_END)
		branch_direction(inst.pc?addr,inst.taken);

	    num_retired = num_retired + 1;
	}

	if(num_retired > 0) {
	    rmap_retire(num_retired);
	    cache_retire(num_retired);
	    instq?pop_front(num_retired);
	    num_insts_retired = (num_insts_retired +
				 num_retired?cvt(unsigned[64]));
	}

	CPU_cycle = CPU_cycle + 1?ext(64);
    }

    ///////////////////////////////////////////////////////////////////////////
    // Exiting main() to generate a new memoization index entry.
    // The following statements set the init variable, providing
    // the correct arguments to the next call to main().
    //

    //
    // Set PC & nPC fields of the init variable

    if(instq?length() > 0) {
	var inst = instq[+0];
	pc1 = inst.pc; npc1 = inst.npc;
    } else { pc1 = PC; npc1 = nPC; }

    cwp1 = CWP0;	// Set CWP field in init variable

    //
    // Construct fuQ & jmpQ

    while(instq?length() > 0) {
	var inst = instq[-1];

	// Add entry to fuQ for instruction
	val fu = struct { ftype = inst.taken?ext(8)<<7 | inst.ftype,
			  ftime = inst.ftime };
	fuQ?push_front(fu);

	switch(inst.pc) {
	 case (pat jmpl || retrn):
	    if(inst.ftime <= 0?cvt(uchar)) {
		// Add entry to jmpQ for JMPL or RETRN instructions
		val target = (rmap_read(inst.srcq[+0],instq?length()-1) +
			      rmap_read(inst.srcq[+1],instq?length()-1));
		jmpQ?push_front(target?cvt(stream));
	    }
	 default: ;
	}

	if(!inst.trap) {
	    CANSAVE = ((CANSAVE?cvt(char) + NWINDOWS?cvt(char) +
			inst.delta) % NWINDOWS?cvt(char))?bits(5);
	    CANRESTORE = ((CANRESTORE?cvt(char) + NWINDOWS?cvt(char) -
			   inst.delta) % NWINDOWS?cvt(char))?bits(5);
	}

	instq?pop_back();	// delete instq entry
    }

    cansave1 = CANSAVE;
    canrestore1 = CANRESTORE;
}