#ifndef _execute_fs
#define _execute_fs
/******************************************************************************
** FILE: execute.fs
** This file defines the exec() function to evaulate instruction operations.
*/

#include "ops.h"
#include "sparc_v9_regs.fs"
#include "instq.fs"
#include "flags.fs"
#include "funits.fs"
#include "rename_exec.fs"

extern save_regs(cwp_t,cwp_t) : void;
extern restore_regs(cwp_t) : void;

extern flush_windows(cwp_t,cwp_t) : void;

extern trap_sparc(ulong,cwp_t,cwp_t): void;

#define TAG_OVERFLOW	0x23?ext(32)
#define INVALID		0x21?ext(32)

extern no_instruction() : void;

fun _z(vv) { return vv == 0?ext(64); }
fun _lez(vv) { return vv <= 0?ext(64); }
fun _lz(vv) { return vv < 0?ext(64); }
fun _nz(vv) { return vv != 0?ext(64); }
fun _gz(vv) { return vv > 0?ext(64); }
fun _gez(vv) { return vv >= 0?ext(64); }

fun f_u(vv) { return vv?bits(2) == 0b11; }
fun f_g(vv) { return vv?bits(2) == 0b10; }
fun f_ug(vv) { return vv?bit(1); }
fun f_l(vv) { return vv?bits(2) == 0b01; }
fun f_ul(vv) { return vv?bit(0); }
fun f_lg(vv) { val xx = vv?bits(2); return xx==0b01 || xx==0b10; }
fun f_ne(vv) { return vv?bits(2) != 0b00; }
fun f_e(vv) { return vv?bits(2) == 0b00; }
fun f_ue(vv) { val xx = vv?bits(2); return xx==0b00 || xx==0b11; }
fun f_ge(vv) { return !vv?bit(0); }
fun f_uge(vv) { return vv?bits(2) != 0b01; }
fun f_le(vv) { return !vv?bit(1); }
fun f_ule(vv) { return vv?bits(2) != 0b10; }
fun f_o(vv) { return vv?bits(2) != 0b11; }

#define C(ccr)	ccr?bit(0)
#define V(ccr)	ccr?bit(1)
#define Z(ccr)	ccr?bit(2)
#define N(ccr)	ccr?bit(3)

fun i_ne(vv) { return !Z(vv); }
fun i_e(vv) { return Z(vv); }
fun i_g(vv) { return !(Z(vv)|(N(vv)^V(vv))); }
fun i_le(vv) { return Z(vv)|(N(vv)^V(vv)); }
fun i_ge(vv) { return !(N(vv)^V(vv)); }
fun i_l(vv) { return N(vv)^V(vv); }
fun i_gu(vv) { return !(C(vv)|Z(vv)); }
fun i_leu(vv) { return C(vv)|Z(vv); }
fun i_cc(vv) { return !C(vv); }
fun i_cs(vv) { return C(vv); }
fun i_pos(vv) { return !N(vv); }
fun i_neg(vv) { return N(vv); }
fun i_vc(vv) { return !V(vv); }
fun i_vs(vv) { return V(vv); }

fun u_div32(x1,x2,var ccr : cc) {
    val xx = x1 / x2?ext(64);
    if(xx > 1?ext(64)<<32) {
	xx = 0xffffffff?ext(64);
	ccr = (ccr?bits(8) | 0x02)?cvt(cc);
    }
    return xx;
}

fun s_div32(x1,x2,var ccr : cc) {
    val xx = (+x1 / +x2?sext(64))?cast(unsigned[64]);
    if(+xx > +(1?ext(64)<<31)) {
	xx = 0x7fffffff?ext(64);
	ccr = (ccr?bits(8) | 0x02)?cvt(cc);
    } else if(+xx < +((-1)?sext(64)<<31)) {
	xx = 0x80000000?sext(64);
	ccr = (ccr?bits(8) | 0x02)?cvt(cc);
    }
    return xx;
}

fun get_div_mul_cc(xx,ccr0) {
    val ccr = ccr0?bits(8) | xx?bit(63)?ext(8) << 7;
    ccr = ccr | (xx==0?ext(64))?ext(8) << 6;
    ccr = ccr | xx?bit(31)?ext(8) << 3;
    return ccr | (xx?bits(32)==0)?ext(8) << 2;
}

fun execute(var inst : InstOp, inum)
{
    val ftime = inst.ftime;
    if(ftime <= 0?cvt(uchar)) {
	inst.ftype = FU_DONE?cvt(uchar);
	return false;	// stop if done
    }
    if(inst.trap) { if(inum > 0) return false; }	// in order traps
    if(!srcs_ready(inst.srcq,inum)) return false;	// sources ready?

    val ftype = inst.ftype;
    if(!fu_alloc(ftype)) return false;		// can we allocate a FU?
    ftime = ftime - 1?cvt(uchar);		// decrement countdown timer
    inst.ftime = ftime;
    if(ftime > 0?cvt(uchar)) return false;	// wait for countdown
    if(!fu_finish(ftype,inst,inum))		// update functional unit data
	return false;				// return if skipping exec

    ///////////////////////////////////////////////////////////////////////////
    // Get source values. This is done here to minimize the code generated
    // by inlining (I don't want rmap_read() inlined too many times).
    //

    //
    // Interpret source data as various types.

    fun i1_src(ii) { return rmap_read(inst.srcq[+ii],inum)?cvt(uchar); }
    fun i2_src(ii) { return rmap_read(inst.srcq[+ii],inum)?cvt(ushort); }
    fun i4_src(ii) { return rmap_read(inst.srcq[+ii],inum)?cvt(ulong); }
    fun x8_src(ii) { return rmap_read(inst.srcq[+ii],inum)?cvt(ullong); }
    fun i8_src(ii) {
	return ((rmap_read(inst.srcq[+ii],inum)<<32) |
		(rmap_read(inst.srcq[+(ii+1)],inum)&0xffffffff?ext(64)));
    }

    fun f4_src(ii) {
	return rmap_read(inst.srcq[+ii],inum)?bits(32)?cast(float);
    }

    fun f8_src(ii) {
	return ((rmap_read(inst.srcq[+(ii+1)],inum)&0xffffffff?ext(64)) |
		(rmap_read(inst.srcq[+ii],inum)<<32))?cast(double);
    }

    fun cc_src(ii) { return rmap_read(inst.srcq[+ii],inum)?cvt(cc); }

    // Abreviation for writing instruction result.
    fun dest(outnum,value) { rmap_write(outnum,value,inum); }
    fun ddest(outnum,value) {
	rmap_write(outnum,value?cast(unsigned[64])?bits(32,63),inum);
	rmap_write(outnum+1,value?cast(unsigned[64])?bits(32),inum);
    }

    // Call extern system call handler.
    fun trap(tnum) {
	if(tnum == 3) {
	    flush_windows(CWP,CANRESTORE);
	    CANSAVE = (NWINDOWS - 2)?cvt(cwp_t);
	    CANRESTORE = 0?cvt(cwp_t);
	} else {
	    trap_sparc(tnum+256,CWP,CANRESTORE);
	}
	is_trap = false;
    }

    ///////////////////////////////////////////////////////////////////////////
    // Do instruction operations:
    //

    val ccr;

    switch(inst.op?cvt(ulong)) {

     case OP_NOP:	;

	//
	// branch/call instructions

     case OP_BRZ:	if(_z(x8_src(0)) != inst.taken) rollback = true;
     case OP_BRLEZ:	if(_lez(x8_src(0)) != inst.taken) rollback = true;
     case OP_BRLZ:	if(_lz(x8_src(0)) != inst.taken) rollback = true;
     case OP_BRNZ:	if(_nz(x8_src(0)) != inst.taken) rollback = true;
     case OP_BRGZ:	if(_gz(x8_src(0)) != inst.taken) rollback = true;
     case OP_BRGEZ:	if(_gez(x8_src(0)) != inst.taken) rollback = true;

     case OP_FBU:	if(f_u(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBG:	if(f_g(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBUG:	if(f_ug(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBL:	if(f_l(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBUL:	if(f_ul(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBLG:	if(f_lg(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBNE:	if(f_ne(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBE:	if(f_e(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBUE:	if(f_ue(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBGE:	if(f_ge(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBUGE:	if(f_uge(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBLE:	if(f_le(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBULE:	if(f_ule(cc_src(0)) != inst.taken) rollback = true;
     case OP_FBO:	if(f_o(cc_src(0)) != inst.taken) rollback = true;

     case OP_BNE:	if(i_ne(cc_src(0)) != inst.taken) rollback = true;
     case OP_BE:	if(i_e(cc_src(0)) != inst.taken) rollback = true;
     case OP_BG:	if(i_g(cc_src(0)) != inst.taken) rollback = true;
     case OP_BLE:	if(i_le(cc_src(0)) != inst.taken) rollback = true;
     case OP_BGE:	if(i_ge(cc_src(0)) != inst.taken) rollback = true;
     case OP_BL:	if(i_l(cc_src(0)) != inst.taken) rollback = true;
     case OP_BGU:	if(i_gu(cc_src(0)) != inst.taken) rollback = true;
     case OP_BLEU:	if(i_leu(cc_src(0)) != inst.taken) rollback = true;
     case OP_BCC:	if(i_cc(cc_src(0)) != inst.taken) rollback = true;
     case OP_BCS:	if(i_cs(cc_src(0)) != inst.taken) rollback = true;
     case OP_BPOS:	if(i_pos(cc_src(0)) != inst.taken) rollback = true;
     case OP_BNEG:	if(i_neg(cc_src(0)) != inst.taken) rollback = true;
     case OP_BVC:	if(i_vc(cc_src(0)) != inst.taken) rollback = true;
     case OP_BVS:	if(i_vs(cc_src(0)) != inst.taken) rollback = true;

	//
	// Conditional moves

     case OP_MOVA:	dest(0,x8_src(0));

     case OP_MOVNE:	if(i_ne(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVE:	if(i_e(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVG:	if(i_g(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVLE:	if(i_le(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVGE:	if(i_ge(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVL:	if(i_l(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVGU:	if(i_gu(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVLEU:	if(i_leu(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVCC:	if(i_cc(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVCS:	if(i_cs(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVPOS:	if(i_pos(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVNEG:	if(i_neg(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVVC:	if(i_vc(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVVS:	if(i_vs(cc_src(0))) dest(0,x8_src(1));

     case OP_MOVFU:	if(f_u(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFG:	if(f_g(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFUG:	if(f_ug(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFL:	if(f_l(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFUL:	if(f_ul(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFLG:	if(f_lg(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFNE:	if(f_ne(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFE:	if(f_e(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFUE:	if(f_ue(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFGE:	if(f_ge(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFUGE:	if(f_uge(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFLE:	if(f_le(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFULE:	if(f_ule(cc_src(0))) dest(0,x8_src(1));
     case OP_MOVFO:	if(f_o(cc_src(0))) dest(0,x8_src(1));

     case OP_MOVRZ:	if(_z(x8_src(0))) dest(0,x8_src(1));
     case OP_MOVRLEZ:	if(_lez(x8_src(0))) dest(0,x8_src(1));
     case OP_MOVRLZ:	if(_lz(x8_src(0))) dest(0,x8_src(1));
     case OP_MOVRNZ:	if(_nz(x8_src(0))) dest(0,x8_src(1));
     case OP_MOVRGZ:	if(_gz(x8_src(0))) dest(0,x8_src(1));
     case OP_MOVRGEZ:	if(_gez(x8_src(0))) dest(0,x8_src(1));

	//
	// System traps

     case OP_TA:	trap(i4_src(0));
     case OP_TNE:	if(i_ne(cc_src(0))) trap(i4_src(1));
     case OP_TE:	if(i_e(cc_src(0))) trap(i4_src(1));
     case OP_TG:	if(i_g(cc_src(0))) trap(i4_src(1));
     case OP_TLE:	if(i_le(cc_src(0))) trap(i4_src(1));
     case OP_TGE:	if(i_ge(cc_src(0))) trap(i4_src(1));
     case OP_TL:	if(i_l(cc_src(0))) trap(i4_src(1));
     case OP_TGU:	if(i_gu(cc_src(0))) trap(i4_src(1));
     case OP_TLEU:	if(i_leu(cc_src(0))) trap(i4_src(1));
     case OP_TCC:	if(i_cc(cc_src(0))) trap(i4_src(1));
     case OP_TCS:	if(i_cs(cc_src(0))) trap(i4_src(1));
     case OP_TPOS:	if(i_pos(cc_src(0))) trap(i4_src(1));
     case OP_TNEG:	if(i_neg(cc_src(0))) trap(i4_src(1));
     case OP_TVC:	if(i_vc(cc_src(0))) trap(i4_src(1));
     case OP_TVS:	if(i_vs(cc_src(0))) trap(i4_src(1));

	//
	// Arithmetic ops

     case OP_ADD:	dest(0, x8_src(0) +  x8_src(1));
     case OP_SUB:	dest(0, x8_src(0) -  x8_src(1));
     case OP_AND:	dest(0, x8_src(0) &  x8_src(1));
     case OP_ANDN:	dest(0, x8_src(0) & ~x8_src(1));
     case OP_OR:	dest(0, x8_src(0) |  x8_src(1));
     case OP_ORN:	dest(0, x8_src(0) | ~x8_src(1));
     case OP_XOR:	dest(0, x8_src(0) ^  x8_src(1));
     case OP_XNOR:	dest(0, x8_src(0) ^ ~x8_src(1));

     case OP_ADDCC:	dest(0, (x8_src(0) + x8_src(1))?cc(ccr));
			dest(1, ccr?bits(8)?cvt(unsigned[64]));
     case OP_SUBCC:	dest(0, (x8_src(0) - x8_src(1))?cc(ccr));
			dest(1, ccr?bits(8)?cvt(unsigned[64]));
     case OP_ANDCC:	dest(0, (x8_src(0) & x8_src(1))?cc(ccr));
			dest(1, ccr?bits(8)?cvt(unsigned[64]));
     case OP_ANDNCC:	dest(0, (x8_src(0) & ~x8_src(1))?cc(ccr));
			dest(1, ccr?bits(8)?cvt(unsigned[64]));
     case OP_ORCC:	dest(0, (x8_src(0) | x8_src(1))?cc(ccr));
			dest(1, ccr?bits(8)?cvt(unsigned[64]));
     case OP_ORNCC:	dest(0, (x8_src(0) | ~x8_src(1))?cc(ccr));
			dest(1, ccr?bits(8)?cvt(unsigned[64]));
     case OP_XORCC:	dest(0, (x8_src(0) ^ x8_src(1))?cc(ccr));
			dest(1, ccr?bits(8)?cvt(unsigned[64]));
     case OP_XNORCC:	dest(0, (x8_src(0) ^ ~x8_src(1))?cc(ccr));
			dest(1, ccr?bits(8)?cvt(unsigned[64]));

     case OP_ADDC:
	dest(0, x8_src(0) + x8_src(1) + cc_src(2)?bit(0)?ext(64));

     case OP_SUBC:
	dest(0, x8_src(0) - x8_src(1) - cc_src(2)?bit(0)?ext(64));

     case OP_ADDCCC:
	val ccr2 = 0x00?cvt(cc);
	val x1 = x8_src(0); val x2 = x8_src(1);
	val xx = ((x1 + x2)?cc(ccr2) + cc_src(2)?bit(0)?ext(64))?cc(ccr);
	dest(0, xx);
	dest(1, ((ccr?bits(8) & 0b11011101) | (ccr2?bits(8) & 0x11) |
		 (((x1?bit(31)==x2?bit(31)) &&
		   (xx?bit(31)!=x1?bit(31)))?ext(8)<<1) |
		 (((x1?bit(63)==x2?bit(63)) &&
		   (xx?bit(63)!=x1?bit(63)))?ext(8)<<5))
	     ? cvt(unsigned[64]));

     case OP_SUBCCC:
	val ccr2 = 0x00?cvt(cc);
	val x1 = x8_src(0); val x2 = x8_src(1);
	val xx = ((x1 - x2)?cc(ccr2) - cc_src(2)?bit(0)?ext(64))?cc(ccr);
	dest(0, xx);
	dest(1, ((ccr?bits(8) & 0b11011101) | (ccr2?bits(8) & 0x11) |
		 (((x1?bit(31)!=x2?bit(31)) &&
		   (xx?bit(31)!=x1?bit(31)))?ext(8)<<1) |
		 (((x1?bit(63)!=x2?bit(63)) &&
		   (xx?bit(63)!=x1?bit(63)))?ext(8)<<5))
	     ? cvt(unsigned[64]));

     case OP_MULX:	dest(0, x8_src(0) * x8_src(1));
     case OP_UDIVX:	dest(0, x8_src(0) / x8_src(1));
     case OP_SDIVX:	dest(0, (+x8_src(0) / +x8_src(1))
			     ? cast(unsigned[64]));

     case OP_SLL:	dest(0, x8_src(0) << x8_src(1)?bits(5));
     case OP_SRL:	dest(0, i4_src(0) >> x8_src(1)?bits(5));
     case OP_SRA:	dest(0, (+i4_src(0) >> x8_src(1)?bits(5))
			      ? sext(64));
     case OP_SLLX:	dest(0, x8_src(0) << x8_src(1)?bits(6));
     case OP_SRLX:	dest(0, x8_src(0) >> x8_src(1)?bits(6));
     case OP_SRAX:	dest(0, (+x8_src(0) >> x8_src(1)?bits(6))
			      ? cast(unsigned[64]));

     case OP_TADDCC:
	val x1 = x8_src(0); val x2 = x8_src(1);
	dest(0, (x1 + x2)?cc(ccr));
	if(x1?bits(2)!=0b00 || x2?bits(2)!=0b00)
	    ccr = (ccr?bits(8) | 0x02)?cvt(cc);
	dest(1, ccr?bits(8)?cvt(unsigned[64]));

     case OP_TADDCCTV:
	val x1 = x8_src(0); val x2 = x8_src(1);
	dest(0, (x1 + x2)?cc(ccr));
	if(x1?bits(2)!=0b00 || x2?bits(2)!=0b00)
	    ccr = (ccr?bits(8) | 0x02)?cvt(cc);
	dest(1, ccr?bits(8)?cvt(unsigned[64]));
	if(ccr?bit(1)) trap(TAG_OVERFLOW);

     case OP_TSUBCC:
	val x1 = x8_src(0); val x2 = x8_src(1);
	dest(0, (x1 - x2)?cc(ccr));
	if(x1?bits(2)!=0b00 || x2?bits(2)!=0b00)
	    ccr = (ccr?bits(8) | 0x02)?cvt(cc);
	dest(1, ccr?bits(8)?cvt(unsigned[64]));

     case OP_TSUBCCTV:
	val x1 = x8_src(0); val x2 = x8_src(1);
	dest(0, (x1 - x2)?cc(ccr));
	if(x1?bits(2)!=0b00 || x2?bits(2)!=0b00)
	    ccr = (ccr?bits(8) | 0x02)?cvt(cc);
	dest(1, ccr?bits(8)?cvt(unsigned[64]));
	if(ccr?bit(1)) trap(TAG_OVERFLOW);

     case OP_UDIV:
	val xx = (x8_src(0)<<32) | (x8_src(1)&((-1)?ext(64)));
	dest(0, u_div32(xx,x8_src(2),ccr));

     case OP_SDIV:
	val xx = (x8_src(0)<<32) | (x8_src(1)&((-1)?ext(64)));
	dest(0, s_div32(xx,x8_src(2),ccr));

     case OP_UMUL:
	val xx = (x8_src(0)&((-1)?ext(64))) * (x8_src(1)&((-1)?ext(64)));
	dest(0, xx >> 32); dest(1, xx?bits(32)?sext(64));

     case OP_SMUL:
	val xx = +x8_src(0)?bits(32)?sext(64) * +x8_src(1)?bits(32)?sext(64);
	dest(0, xx >> 32); dest(1, xx?bits(32)?sext(64));

     case OP_UDIVCC:
	val xx = (x8_src(0)<<32) | (x8_src(1)&((-1)?ext(64)));
	xx = u_div32(xx,x8_src(2),ccr); dest(0, xx);
	dest(1, get_div_mul_cc(xx,ccr));

     case OP_SDIVCC:
	val xx = (x8_src(0)<<32) | (x8_src(1)&((-1)?ext(64)));
	xx = s_div32(xx,x8_src(2),ccr); dest(0, xx);
	dest(1, get_div_mul_cc(xx,ccr));

     case OP_MULSCC:
	val y0 = x8_src(0)?bit(0); ccr = cc_src(3);
	dest(0, (x8_src(0)>>1) | ((x8_src(1)&1?ext(64))<<31));
	val xx = (i4_src(1)>>1) | ((ccr?bit(3)^ccr?bit(1))?ext(32)<<31);
	if(y0) dest(1, (xx + i4_src(2)?bits(32))?cc(ccr));
	else dest(1, (xx + 0)?cc(ccr));
	dest(2, ccr?bits(8));

     case OP_POPC:
	val xx = x8_src(0);
	val ii=0; val count=0?ext(64);
	while(ii < 64) {
	    count = count + ((xx >> ii) & 0x1?ext(64));
	    ii = ii + 1;
	}
	dest(0, count);

     case OP_FLUSH:	cache_flush();
     case OP_FLUSHW:	trap(3);

     case OP_RD:	dest(0, x8_src(0));
     case OP_WR:	dest(0, x8_src(0) ^ x8_src(1));

//
// Floating-point arithmetic

     case OP_FADDS:	 dest(0, f4_src(0) + f4_src(1));
     case OP_FADDD:	ddest(0, f8_src(0) + f8_src(2));
     case OP_FSUBS:	 dest(0, f4_src(0) - f4_src(1));
     case OP_FSUBD:	ddest(0, f8_src(0) - f8_src(2));

     case OP_FCMPS:	(f4_src(0) - f4_src(1))?cc(ccr); dest(0, ccr?bits(8));
     case OP_FCMPD:	(f8_src(0) - f8_src(2))?cc(ccr); dest(0, ccr?bits(8));
     case OP_FCMPES:	(f4_src(0) - f4_src(1))?cc(ccr); dest(0, ccr?bits(8));
			if(f_u(ccr)) trap(INVALID);
     case OP_FCMPED:	(f8_src(0) - f8_src(2))?cc(ccr); dest(0, ccr?bits(8));
			if(f_u(ccr)) trap(INVALID);

     case OP_FSTOX:	ddest(0, f4_src(0)?cvt(signed[64]));
     case OP_FDTOX:	ddest(0, f8_src(0)?cvt(signed[64]));
     case OP_FSTOI:	 dest(0, f4_src(0)?cvt(signed[32]));
     case OP_FDTOI:	 dest(0, f8_src(0)?cvt(signed[32]));
     case OP_FSTOD:	ddest(0, f4_src(0)?cvt(double));
     case OP_FXTOS:	 dest(0, i8_src(0)?cast(signed[_])?cvt(float));
     case OP_FXTOD:	ddest(0, i8_src(0)?cast(signed[_])?cvt(double));
     case OP_FITOS:	 dest(0, i4_src(0)?cast(signed[_])?cvt(float));
     case OP_FITOD:	ddest(0, i4_src(0)?cast(signed[_])?cvt(double));
     case OP_FDTOS:	 dest(0, f8_src(0)?cvt(float));

     case OP_FMOVS:	 dest(0, f4_src(0));
     case OP_FMOVD:	ddest(0, f8_src(0));
     case OP_FNEGS:	 dest(0, -f4_src(0));
     case OP_FNEGD:	ddest(0, -f8_src(0));
     case OP_FABSS:	if(f4_src(0) < 0?cvt(float)) dest(0, -f4_src(0));
			else dest(0, f4_src(0));
     case OP_FABSD:	if(f8_src(0) < 0?cvt(double)) ddest(0, -f8_src(0));
			else ddest(0, f8_src(0));

     case OP_FMULS:	 dest(0, f4_src(0) * f4_src(1));
     case OP_FMULD:	ddest(0, f8_src(0) * f8_src(2));
     case OP_FDIVS:	 dest(0, f4_src(0) / f4_src(1));
     case OP_FDIVD:	ddest(0, f8_src(0) / f8_src(2));
     case OP_FSMULD:	ddest(0, f4_src(0)?cvt(double)
			      * f4_src(1)?cvt(double));

     case OP_FSQRTS:	 dest(0, sqrt(f4_src(0)));
     case OP_FSQRTD:	ddest(0, sqrt(f8_src(0)));

//
// Load, store, etc.

     case OP_LDSB:	dest(0, M1s(x8_src(0) + x8_src(1)));
     case OP_LDSH:	dest(0, M2s(x8_src(0) + x8_src(1)));
     case OP_LDSW:	dest(0, M4s(x8_src(0) + x8_src(1)));
     case OP_LDUB:	dest(0, M1(x8_src(0) + x8_src(1)));
     case OP_LDUH:	dest(0, M2(x8_src(0) + x8_src(1)));
     case OP_LDUW:	dest(0, M4(x8_src(0) + x8_src(1)));
     case OP_LDX:	dest(0, M8(x8_src(0) + x8_src(1)));
     case OP_LDD:
	val xx = M8(x8_src(0) + x8_src(1));
	dest(0, xx >> 32); dest(1, xx & (-1)?ext(64));

     case OP_STB:	M1(x8_src(0) + x8_src(1), i1_src(2));
     case OP_STH:	M2(x8_src(0) + x8_src(1), i2_src(2));
     case OP_STW:	M4(x8_src(0) + x8_src(1), i4_src(2));
     case OP_STX:	M8(x8_src(0) + x8_src(1), x8_src(2));
     case OP_STD:	M8(x8_src(0) + x8_src(1), i8_src(2));

     case OP_CASA:
	val aa = x8_src(0); val xx = M4(aa);
	if((x8_src(1)&(-1)?ext(64)) == xx) M4(aa,i4_src(2));
	dest(0, xx);

     case OP_CASXA:
	val aa = x8_src(0); val xx = M8(aa);
	if(x8_src(1) == xx) M8(aa,x8_src(2));
	dest(0, xx);

     case OP_LDSTUB:
	val aa = x8_src(0) + x8_src(1);
	dest(0, M1(aa)); M1(aa,0xff);

     case OP_SWAP:
	val aa = x8_src(0) + x8_src(1); val xx = M4(aa);
	M4(aa,i4_src(2)); dest(0, xx);

     case OP_LDFSR:	set_FSR4(M4(x8_src(0)+x8_src(1))?bits(32));
			is_trap = false;
     case OP_LDXFSR:	set_FSR8(M8(x8_src(0)+x8_src(1))); is_trap = false;
     case OP_STFSR:	M4(x8_src(0)+x8_src(1),get_FSR4()); is_trap = false;
     case OP_STXFSR:	M8(x8_src(0)+x8_src(1),get_FSR8()); is_trap = false;

     case OP_SAVE:
	dest(0, x8_src(0) +  x8_src(1));
	if(inst.trap) {
	    save_regs(CWP0,CANRESTORE);
	    CANSAVE = (CANRESTORE?cvt(ulong) - 1)?cvt(cwp_t);
	    CANRESTORE = 1?cvt(cwp_t);
	    is_trap = false;
	}

     case OP_RESTORE:
	dest(0, x8_src(0) +  x8_src(1));
	if(inst.trap) {
	    restore_regs(CWP0);
	    if(CANSAVE?cvt(ulong) < NWINDOWS-2)
		CANSAVE = (CANSAVE?cvt(ulong) + 1)?cvt(cwp_t);
	    is_trap = false;
	}

     case OP_RETRN:
	val npc = (x8_src(0) + x8_src(1))?cvt(stream)?static;
	if(inst.trap) {
	    restore_regs(CWP0);
	    if(CANSAVE?cvt(ulong) < NWINDOWS-2)
		CANSAVE = (CANSAVE?cvt(ulong) + 1)?cvt(cwp_t);
	    is_trap = false;
	}
	if(instq?length() == inum+1) nPC = npc;
	else {
	    assert(instq?length() == inum+2);
	    instq[-1].npc = npc; PC = npc;
	    if(!instq[-1].taken) nPC = PC + 4;
	}

     case OP_JMPL:
	val npc = (x8_src(0) + x8_src(1))?cvt(stream)?static;
	if(instq?length() == inum+1) nPC = npc;
	else {
	    assert(instq?length() == inum+2);
	    instq[-1].npc = npc; PC = npc;
	    if(!instq[-1].taken) nPC = PC + 4;
	}

     case OP_ILL:	no_instruction(); is_trap = false;
    }

    return true;
}

#endif