/******************************************************************************
** FILE: cache.c
** Cache and memory simulator.
*/
#include <assert.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/types.h>
#include "internal.h"
#include "fastsim.h"
#include "param.h"
/* These are the functions that are called from Facile */
extern void xcache_init();
extern void xcache_flush();
extern int xcache_load_start(ulong_t inum, ulong_t va,
int first, short *delay);
extern int xcache_load_continue(ulong_t inum, short *delay);
extern int xcache_store_start(ulong_t inum, ulong_t va,
int is_spec, short *delay);
extern int xcache_store_commit(ulong inum, short *delay);
extern void xcache_store_rollback(ulong inum);
extern void xcache_retire(ulong N);
/******************************************************************************
* Data types used by the cache simulator:
*/
typedef u_longlong_t alarm_t;
typedef enum EventTag {
EVENT_LOAD_L1,
EVENT_LOAD_L2,
EVENT_LOAD_MEM,
EVENT_STORE,
} EventTag_t;
typedef struct CacheEvent {
struct CacheEvent *next_alloc;
struct CacheEvent *prev_alloc;
struct CacheEvent *next_free;
EventTag_t kind;
alarm_t alarm;
void* data;
} CacheEvent_t, *CacheEvent_p;
static CacheEvent_t eventQ;
static CacheEvent_t delayedQ;
static CacheEvent_p free_events;
#define MAX_NUM_EVENTS 512
static CacheEvent_t _events[MAX_NUM_EVENTS];
static CacheEvent_p event_top();
static CacheEvent_p event_alloc(alarm_t);
static void event_free(CacheEvent_p);
static CacheEvent_p delayed_event_alloc(alarm_t);
static void add_delayed_events();
typedef struct MSHR {
struct MSHR *next_free; /* next mshr in free list */
ulong_t address; /* cache line aligned load address */
alarm_t alarm; /* time of next relevant event */
struct MSHR *mshr2; /* point from l1-mshr to l2-mshr */
ushort_t istart; /* value of istate start (when mshr alloced) */
short inum; /* inum of load (when mshr alloced) */
uchar_t wait; /* TRUE if waiting for a store to finish */
uchar_t ref_count; /* ref-count for garbage collection */
uchar_t is_free; /* flag if this mshr not allocated */
#ifdef STATS
uchar_t num_coalesced; /* number of loads coalsced into this MSHR */
#endif
} MSHR_t, *MSHR_p;
static MSHR_t mshr_list[NUM_MSHRs];
static MSHR_p mshr_free_list;
static ulong_t mshr_num_free;
static void mshr_free(MSHR_p);
static MSHR_t mshr2_list[NUM_MSHR2s];
static MSHR_p mshr2_free_list;
static ulong_t mshr2_num_free;
static void mshr2_free(MSHR_p);
typedef struct WBuf {
struct WBuf *next_free; /* next wbuf in free list */
ulong_t address; /* wbuf aligned store adderess */
alarm_t alarm; /* time to next relevant event */
ushort_t istart; /* value of istate start (when mshr alloced) */
short inum; /* inum of load (when mshr alloced) */
uchar_t is_free; /* flag if this wbuf not allocated */
} WBuf_t, *WBuf_p;
static WBuf_t wbuf_list[NUM_WBUFs];
static WBuf_p wbuf_free_list;
static ulong_t wbuf_num_free;
static void wbuf_free(WBuf_p);
static void finish_free(); /* finish MSHR & WBuf free operations */
typedef struct CacheEntry {
ulong_t address; /* cache line alligned address */
uchar_t is_free; /* flag if this entry not allocated */
uchar_t touch; /* flag when touched (for LRU algorithm) */
} CacheEntry_t, *CacheEntry_p;
#define L1_BANK_SIZE (L1_CACHE_SIZE/L1_CACHE_ASSOC)
#define L1_BANK_LINES (L1_BANK_SIZE/L1_LINE_WIDTH)
static CacheEntry_t l1_cache[L1_BANK_LINES][L1_CACHE_ASSOC];
static uchar_t l1_clock[L1_BANK_LINES];
#define L2_BANK_SIZE (L2_CACHE_SIZE/L2_CACHE_ASSOC)
#define L2_BANK_LINES (L2_BANK_SIZE/L2_LINE_WIDTH)
static CacheEntry_t l2_cache[L2_BANK_LINES][L2_CACHE_ASSOC];
static uchar_t l2_clock[L2_BANK_LINES];
typedef union {
MSHR_p mshr;
WBuf_p wbuf;
} IState_t;
void mem_test(ulong_t va);
static int testing_va = 0;
static ulong_t va_to_test;
static int va_test_failed;
/******************************************************************************
* Static functions and global data:
*/
/* Current cycle of the pipeline simulation. (defined in Facile) */
extern u_longlong_t xCPU_cycle;
/* Last simulation cycle reached by the cache simulator. */
static u_longlong_t cache_cycle = 0;
#define NUM_ISTATES 0x100
static ulong_t istate_start = 0; /* start index for istate array */
static IState_t istate[NUM_ISTATES]; /* interface state array */
/* Compute the delay from the current cycle to a given alarm cycle. */
static inline short
alarm_to_delay(alarm_t alarm)
{ return alarm - xCPU_cycle; }
static inline alarm_t
mshr_alarm(MSHR_p mshr)
{
MSHR_p mshr2 = mshr->mshr2;
if(mshr2) return mshr2->alarm;
else return mshr->alarm;
}
static inline int
_buf_inum(int inum, ulong_t istart)
{
int ret = (inum + istart) % NUM_ISTATES - istate_start;
if(ret > NUM_ISTATES/2) return ret - NUM_ISTATES;
if(ret < -(NUM_ISTATES/2)) return ret + NUM_ISTATES;
return ret;
}
#define BUF_INUM(buf) (_buf_inum((buf)->inum,(buf)->istart))
static MSHR_p cache_load_start(ulong_t,int,short*,int);
static int cache_load_continue(MSHR_p,short*);
static WBuf_p cache_store_start(ulong_t,int,short*,int);
static int cache_commit(WBuf_p,short*);
static void cache_rollback(WBuf_p);
static void load_l1(MSHR_p); /* waits for conflicting stores */
static void load_l2(MSHR_p); /* process a L2 cache lookup */
static int lookup_l1(ulong_t);
static void update_l1(ulong_t);
static void flush_line_l1(ulong_t);
static int lookup_l2(ulong_t);
static void update_l2(ulong_t);
static void flush_line_l2(ulong_t);
/* Cache Statistics */
hrtime_t time_cache = 0;
static hrtime_t time_cache_start;
u_longlong_t count_cache_flush = 0;
u_longlong_t count_cache_load_start = 0;
u_longlong_t count_cache_no_mshr = 0;
u_longlong_t count_cache_load_continue = 0;
u_longlong_t count_cache_load_hit_l1 = 0;
u_longlong_t count_cache_load_hit_l2 = 0;
u_longlong_t count_cache_load_memory = 0;
u_longlong_t count_cache_load_removed = 0;
u_longlong_t count_cache_store_start = 0;
u_longlong_t count_cache_store_spec = 0;
u_longlong_t count_cache_no_wbuf = 0;
u_longlong_t count_cache_no_wbuf_spec = 0;
u_longlong_t count_cache_store_commit = 0;
u_longlong_t count_cache_store_commit_fail = 0;
u_longlong_t count_cache_store_rollback = 0;
u_longlong_t count_cache_segfault = 0;
/******************************************************************************
* Cache simulator interface functions: These functions hide the cache
* simulator's internal data structures (i.e., MSHR_t and WBuf_t) from
* Facile.
*/
void
xcache_init()
{
ulong_t ii, jj;
for(ii=0; ii<MAX_NUM_EVENTS-1; ++ii)
_events[ii].next_free = &_events[ii+1];
_events[MAX_NUM_EVENTS-1].next_free = NULL;
free_events = _events;
eventQ.next_alloc = &eventQ;
eventQ.prev_alloc = &eventQ;
eventQ.alarm = 0;
delayedQ.next_alloc = &delayedQ;
delayedQ.prev_alloc = &delayedQ;
delayedQ.alarm = 0;
for(ii=0; ii<NUM_MSHRs; ++ii) {
mshr_list[ii].next_free = &mshr_list[ii+1];
mshr_list[ii].is_free = 1;
}
mshr_list[NUM_MSHRs-1].next_free = NULL;
mshr_free_list = mshr_list;
mshr_num_free = NUM_MSHRs;
for(ii=0; ii<NUM_MSHR2s; ++ii) {
mshr2_list[ii].next_free = &mshr2_list[ii+1];
mshr2_list[ii].is_free = 1;
}
mshr2_list[NUM_MSHR2s-1].next_free = NULL;
mshr2_free_list = mshr2_list;
mshr2_num_free = NUM_MSHR2s;
for(ii=0; ii<NUM_WBUFs; ++ii) {
wbuf_list[ii].next_free = &wbuf_list[ii+1];
wbuf_list[ii].is_free = 1;
}
wbuf_list[NUM_WBUFs-1].next_free = NULL;
wbuf_free_list = wbuf_list;
wbuf_num_free = NUM_WBUFs;
for(ii=0; ii<L1_BANK_LINES; ++ii) {
for(jj=0; jj<L1_CACHE_ASSOC; ++jj)
l1_cache[ii][jj].is_free = 1;
l1_clock[ii] = 0;
}
for(ii=0; ii<L2_BANK_LINES; ++ii) {
for(jj=0; jj<L2_CACHE_ASSOC; ++jj)
l2_cache[ii][jj].is_free = 1;
l2_clock[ii] = 0;
}
istate_start = 0;
}
void
xcache_flush()
{
#ifdef STATS
++count_cache_flush;
#endif
}
int
xcache_load_start(ulong_t inum, ulong_t va, int first, short *delay)
{
ulong_t index;
MSHR_p mshr;
int ret;
#ifdef STATS
++count_cache_load_start;
time_cache_start = gethrtime();
#endif
index = (inum + istate_start) % NUM_ISTATES;
mshr = cache_load_start(va,first,delay,(int)inum);
istate[index].mshr = mshr; ret = (mshr || *delay<0) ? 0 : 1;
#ifdef STATS
time_cache += gethrtime() - time_cache_start;
if(*delay < 0) ++count_cache_no_mshr;
#endif
return ret;
}
int
xcache_load_continue(ulong_t inum, short *delay)
{
ulong_t index;
int ret;
#ifdef STATS
++count_cache_load_continue;
time_cache_start = gethrtime();
#endif
index = (inum + istate_start) % NUM_ISTATES;
ret = cache_load_continue(istate[index].mshr,delay);
#ifdef STATS
time_cache += gethrtime() - time_cache_start;
#endif
return ret;
}
int
xcache_store_start(ulong_t inum, ulong_t va, int is_spec, short *delay)
{
ulong_t index;
WBuf_p wbuf;
int ret;
#ifdef STATS
++count_cache_store_start;
if(is_spec) ++count_cache_store_spec;
time_cache_start = gethrtime();
#endif
index = (inum + istate_start) % NUM_ISTATES;
wbuf = cache_store_start(va,is_spec,delay,(int)inum);
istate[index].wbuf = wbuf; ret = wbuf ? 1 : 0;
#ifdef STATS
time_cache += gethrtime() - time_cache_start;
if(!wbuf) {
++count_cache_no_wbuf;
if(is_spec) ++count_cache_no_wbuf_spec;
} else if(*delay > 0)
++count_cache_store_commit_fail;
#endif
return ret;
}
int
xcache_store_commit(ulong_t inum, short *delay)
{
ulong_t index;
int ret;
#ifdef STATS
++count_cache_store_commit;
time_cache_start = gethrtime();
#endif
index = (inum + istate_start) % NUM_ISTATES;
ret = cache_commit(istate[index].wbuf,delay);
#ifdef STATS
time_cache += gethrtime() - time_cache_start;
if(!ret) ++count_cache_store_commit_fail;
#endif
return ret;
}
void
xcache_store_rollback(ulong_t inum)
{
ulong_t index;
#ifdef STATS
++count_cache_store_rollback;
time_cache_start = gethrtime();
#endif
index = (inum + istate_start) % NUM_ISTATES;
cache_rollback(istate[index].wbuf);
#ifdef STATS
time_cache += gethrtime() - time_cache_start;
#endif
}
void
xcache_retire(ulong_t N)
{
istate_start = (istate_start + N) % NUM_ISTATES;
}
/******************************************************************************
******************************************************************************
*** The rest of these routines perform the actual cache simulation! ***
******************************************************************************
*****************************************************************************/
/* Advance cache simulation to current execution time. */
static void
cache_advance()
{
assert(xCPU_cycle >= cache_cycle);
for(;;) {
CacheEvent_p event = event_top();
if(!event || event->alarm > xCPU_cycle)
break;
cache_cycle = event->alarm;
finish_free();
switch(event->kind) {
case EVENT_LOAD_L1: load_l1((MSHR_p)event->data); break;
case EVENT_LOAD_L2: load_l2((MSHR_p)event->data); break;
case EVENT_LOAD_MEM: {
/* just waiting to finish the load */
MSHR_p mshr = (MSHR_p)event->data;
update_l1(mshr->address);
assert(mshr->mshr2);
update_l2(mshr->address);
#ifdef STATS
count_cache_load_memory += mshr->num_coalesced;
#endif
mshr_free(mshr);
break;
}
case EVENT_STORE: {
/* just waiting to finish the store */
WBuf_p wbuf = (WBuf_p)event->data;
add_delayed_events();
wbuf_free(wbuf);
break;
}
default: assert(!"Unknown cache event!"); break;
}
event_free(event);
}
add_delayed_events();
cache_cycle = xCPU_cycle;
finish_free();
}
/******************************************************************************
* This is a never before seen load request.
* Find given address in the L1 cache, else allocate an L1 MSHR
* to manage the miss throught the rest of the memory heirarchy.
*/
MSHR_p
cache_load_start(ulong_t va, int first, short *delay, int inum)
{
CacheEvent_p event;
alarm_t alarm = 0;
MSHR_p mshr = NULL;
WBuf_p wbuf = NULL;
int mshr_inum;
int wbuf_inum;
int ii;
if(inum > 0) {
if(va == 0) xmem_test_failed = 1;
else {
xmem_test_flag = 1;
xmem_test_failed = 0;
xmem_test_address = (void*)va;
xmem_load8_test((void*)va);
xmem_test_flag = 0;
}
if(xmem_test_failed) {
++count_cache_segfault;
*delay = -128; return NULL;
}
}
va &= ~(L1_LINE_WIDTH-1);
/**************************************************************************
* Update cache state, advancing the simulation up
* to the current execution time (ie, CPU_cycle). */
cache_advance(inum);
/* Conservatively avoid conflicts with data in the write-buffer.
* Don't allow any conflicting loads until prior writes complete. */
alarm = xCPU_cycle + 1;
wbuf_inum = -NUM_ISTATES;
for(ii=0; ii<NUM_WBUFs; ++ii) {
WBuf_p _wbuf = &wbuf_list[ii];
int _wbuf_inum = BUF_INUM(_wbuf);
if(_wbuf->is_free) continue;
if((_wbuf->address & ~(L1_LINE_WIDTH-1)) == va && _wbuf_inum <= inum) {
if(_wbuf->alarm > alarm) alarm = _wbuf->alarm;
if(_wbuf_inum > wbuf_inum) {
wbuf_inum = _wbuf_inum;
wbuf = _wbuf;
}
}
}
/* Lookup address in the L1 cache. */
if(!wbuf && lookup_l1(va)) {
#ifdef STATS
++count_cache_load_hit_l1;
#endif
*delay = CACHE_HIT_DELAY;
return NULL;
}
/* L1 cache miss: First try coalescing this load
* with a prior load request. */
for(ii=0; ii<NUM_MSHRs; ++ii) {
mshr = &mshr_list[ii];
mshr_inum = BUF_INUM(mshr);
if(mshr->is_free || (wbuf && mshr_inum < wbuf_inum))
continue;
if(mshr->address == va && mshr_inum <= inum) {
*delay = alarm_to_delay(mshr_alarm(mshr));
#ifdef STATS
++mshr->num_coalesced;
#endif
return mshr;
}
}
/* Test if we can allocate a new L1 MSHR. */
if(mshr_num_free < 1 || (!first && mshr_num_free <= 1)) {
/* Cannot allocate an MSHR for the L1 cache;
* Return the minimum cycle delay until one becomes free. */
alarm = ~(alarm_t)0;
for(ii=0; ii<NUM_MSHRs; ++ii) {
mshr = &mshr_list[ii];
if(mshr->is_free) continue;
if(mshr_alarm(mshr) < alarm)
alarm = mshr_alarm(mshr);
}
/* delay returned negative => operation aborted */
*delay = -alarm_to_delay(alarm);
return NULL;
}
if(!wbuf) alarm = xCPU_cycle + L1_MISS_DELAY;
/* Allocate new L1 MSHR. */
mshr = mshr_free_list; mshr_free_list = mshr->next_free;
mshr->is_free = 0; mshr->ref_count = 1; --mshr_num_free;
mshr->address = va & ~(L1_LINE_WIDTH-1);
mshr->alarm = alarm;
mshr->mshr2 = NULL;
mshr->istart = istate_start;
mshr->inum = inum;
mshr->wait = wbuf ? 1 : 0;
#ifdef STATS
mshr->num_coalesced = 1;
#endif
/* Add a cache event to continue processing this load request. */
event = event_alloc(alarm);
event->kind = wbuf ? EVENT_LOAD_L1 : EVENT_LOAD_L2;
event->data = mshr;
/* Set the delay for an L1 cache miss,
* and return the corresponding MSHR. */
*delay = alarm_to_delay(alarm);
return mshr;
}
/******************************************************************************
* Delay has expired on an existing load; Advance cache simulation.
*/
int
cache_load_continue(MSHR_p mshr, short *delay)
{
CacheEvent_p event;
alarm_t alarm;
WBuf_p wbuf;
/**************************************************************************
* Update cache state, advancing the simulation up
* to the current execution time (ie, CPU_cycle). */
cache_advance();
alarm = mshr_alarm(mshr);
assert(alarm >= xCPU_cycle);
if(alarm == xCPU_cycle) { /* if load is done */
*delay = CACHE_HIT_DELAY;
return 1;
}
/* Load still in progress */
*delay = alarm_to_delay(alarm);
return 0;
}
void
load_l1(MSHR_p mshr)
{
CacheEvent_p event;
WBuf_p wbuf = NULL;
alarm_t alarm = 0;
int wbuf_inum;
int mshr_inum;
int retry = 0;
int ii;
/* Verify that all conflicting stores have completed. */
alarm = xCPU_cycle;
wbuf_inum = -NUM_ISTATES;
mshr_inum= BUF_INUM(mshr);
for(ii=0; ii<NUM_WBUFs; ++ii) {
WBuf_p _wbuf = &wbuf_list[ii];
int _wbuf_inum = BUF_INUM(_wbuf);
if(_wbuf->is_free) continue;
if((_wbuf->address & ~(L1_LINE_WIDTH-1)) == mshr->address &&
_wbuf_inum <= mshr_inum) {
if(_wbuf->alarm > alarm) alarm = _wbuf->alarm;
if(_wbuf_inum > wbuf_inum) {
wbuf_inum = _wbuf_inum;
wbuf = _wbuf;
}
}
}
if(wbuf) {
/* Still waiting for conflicting stores */
assert(!mshr->mshr2);
if(!wbuf->alarm) ++alarm;
retry = alarm == xCPU_cycle;
} else {
/* No more conflicting stores => start processing load. */
if(lookup_l1(mshr->address)) {
/* Hit in L1 cache */
#ifdef STATS
count_cache_load_hit_l1 += mshr->num_coalesced;
#endif
mshr_free(mshr);
return;
}
/* Miss in L1 cache */
alarm = xCPU_cycle + L1_MISS_DELAY;
}
/* Add a cache event to continue processing this load request. */
mshr->alarm = alarm;
mshr->wait = wbuf ? 1 : 0;
if(!retry) event = event_alloc(alarm);
else event = delayed_event_alloc(alarm);
event->kind = wbuf ? EVENT_LOAD_L1 : EVENT_LOAD_L2;
event->data = mshr;
}
void
load_l2(MSHR_p mshr)
{
CacheEvent_p event;
int mshr_inum;
MSHR_p mshr2;
int ii;
/* Lookup load address in the L2 cache.
* If found, then we are done with this load. */
if(lookup_l2(mshr->address)) {
#ifdef STATS
count_cache_load_hit_l2 += mshr->num_coalesced;
#endif
update_l1(mshr->address);
mshr_free(mshr);
return;
}
/**************************************************************************
* Load missed in the L2 cache.
*/
/* Try to assign this load to an L2 MSHR. First test if it can be
* coalesced into an existing L2 MSHR for an earlier instruction. */
mshr_inum = BUF_INUM(mshr);
for(ii=0; ii<NUM_MSHR2s; ++ii) {
mshr2 = &mshr2_list[ii];
if(mshr2->is_free) continue;
if(mshr2->address == (mshr->address & ~(L2_LINE_WIDTH-1)) &&
BUF_INUM(mshr2) <= mshr_inum) {
mshr->mshr2 = mshr2;
++mshr2->ref_count;
break;
}
}
if(!mshr->mshr2) {
/* Allocate a new L2 MSHR for this load.
* (There should always be a free L2 MSHR.) */
mshr2 = mshr2_free_list; mshr2_free_list = mshr2->next_free;
mshr2->is_free = 0; mshr2->ref_count = 1; --mshr2_num_free;
mshr2->address = mshr->address & ~(L2_LINE_WIDTH-1);
mshr2->alarm = cache_cycle + L2_MISS_DELAY;
mshr2->istart = mshr->istart;
mshr2->inum = mshr->inum;
mshr->mshr2 = mshr2;
}
/* Add a cache event to continue processing this load request. */
event = event_alloc(mshr_alarm(mshr));
event->kind = EVENT_LOAD_MEM;
event->data = mshr;
}
WBuf_p
cache_store_start(ulong_t va, int is_spec, short *delay, int inum)
{
WBuf_p wbuf;
int ii;
va &= ~(WRITE_LINE_WIDTH-1);
/**********************************************************************
* Update cache state, advancing the simulation up
* to the current execution time (ie, CPU_cycle). */
cache_advance();
/* Try to allocate a new write-buffer. */
if(wbuf_num_free <= 0) {
/* No free write-buffers; Delay 'til first free buffer. */
alarm_t alarm = ~(alarm_t)0;
for(ii=0; ii<NUM_WBUFs; ++ii) {
wbuf = &wbuf_list[ii];
if(wbuf->is_free) continue;
if(wbuf->alarm > 0 && wbuf->alarm < alarm)
alarm = wbuf->alarm;
}
if(alarm == ~(alarm_t)0) *delay = -1;
else {
assert(alarm > xCPU_cycle);
*delay = -alarm_to_delay(alarm);
}
return NULL;
}
/* Allocate a new write-buffer. */
wbuf = wbuf_free_list; wbuf_free_list = wbuf->next_free;
wbuf->is_free = 0; --wbuf_num_free;
wbuf->address = va & ~(WRITE_LINE_WIDTH-1);
wbuf->istart = istate_start;
wbuf->inum = inum;
wbuf->alarm = 0;
if(is_spec) *delay = 0;
else {
ii = cache_commit(wbuf,delay);
assert(ii || *delay > 0);
}
/* This store may conflict with executing loads that appear to
* occur later in the instruction stream. This can happen if a
* load was issued specualtively, then later rolled back. If a
* conflict exists then cancel the load. */
for(ii=0; ii<NUM_MSHRs; ++ii) {
MSHR_p _mshr = &mshr_list[ii];
if(_mshr->is_free) continue;
if(_mshr->address == (va & ~(L1_LINE_WIDTH-1)) &&
BUF_INUM(_mshr) >= inum && !_mshr->wait) {
/* Remove pending events for this MSHR and free the MSHR */
CacheEvent_p event = eventQ.next_alloc;
for(event=eventQ.next_alloc; event!=&eventQ;
event=event->next_alloc) {
switch(event->kind) {
case EVENT_LOAD_L1:
case EVENT_LOAD_L2:
case EVENT_LOAD_MEM:
if((MSHR_p)event->data == _mshr)
event_free(event);
break;
default: break;
}
}
#ifdef STATS
count_cache_load_removed += _mshr->num_coalesced;
#endif
mshr_free(_mshr);
}
}
return wbuf;
}
int
cache_commit(WBuf_p wbuf, short *delay)
{
CacheEvent_p event;
MSHR_p mshr = NULL;
alarm_t alarm = 0;
int wbuf_inum;
int ii;
/**************************************************************************
* Update cache state, advancing the simulation up
* to the current execution time (ie, CPU_cycle). */
cache_advance();
/* Make sure this store occurs after conflicting loads complete. */
wbuf_inum = BUF_INUM(wbuf);
for(ii=0; ii<NUM_MSHRs; ++ii) {
MSHR_p _mshr = &mshr_list[ii];
if(_mshr->is_free) continue;
if(_mshr->address == (wbuf->address & ~(L1_LINE_WIDTH-1)) &&
BUF_INUM(_mshr) < wbuf_inum && mshr_alarm(_mshr) > alarm) {
mshr = _mshr; alarm = mshr_alarm(_mshr);
}
}
if(mshr) {
/* Conflicting load found */
wbuf->alarm = alarm;
*delay = alarm_to_delay(alarm);
return 0;
}
/* Continue to simulate timing of store out to memory. */
alarm = xCPU_cycle + WRITE_DELAY;
wbuf->alarm = alarm;
/* Add a cache event to continue processing this store. */
event = event_alloc(alarm);
event->kind = EVENT_STORE;
event->data = wbuf;
/* No conflicts => allow commit to succeed. */
*delay = 0; return 1;
}
void
cache_rollback(WBuf_p wbuf)
{
/**************************************************************************
* Update cache state, advancing the simulation up
* to the current execution time (ie, CPU_cycle). */
cache_advance();
/* free resources */
wbuf_free(wbuf);
}
CacheEvent_p
event_top()
{
CacheEvent_p event = eventQ.next_alloc;
if(event == &eventQ) return 0;
return event;
}
CacheEvent_p
event_alloc(alarm_t alarm)
{
CacheEvent_p event, ee;
assert(free_events);
event = free_events;
free_events = event->next_free;
event->alarm = alarm;
for(ee=eventQ.prev_alloc; ee!=&eventQ; ee=ee->prev_alloc)
if(alarm >= ee->alarm) break;
event->next_alloc = ee->next_alloc; event->prev_alloc = ee;
event->next_alloc->prev_alloc = ee->next_alloc = event;
return event;
}
void
event_free(CacheEvent_p event)
{
event->next_alloc->prev_alloc = event->prev_alloc;
event->prev_alloc->next_alloc = event->next_alloc;
event->next_free = free_events;
free_events = event;
}
CacheEvent_p
delayed_event_alloc(alarm_t alarm)
{
CacheEvent_p event, ee;
assert(free_events);
event = free_events;
free_events = event->next_free;
event->alarm = alarm;
for(ee=delayedQ.prev_alloc; ee!=&delayedQ; ee=ee->prev_alloc)
if(alarm >= ee->alarm) break;
event->next_alloc = ee->next_alloc; event->prev_alloc = ee;
event->next_alloc->prev_alloc = ee->next_alloc = event;
return event;
}
void
add_delayed_events()
{
CacheEvent_p event, next, ee;
for(event=delayedQ.next_alloc; event!=&delayedQ; event=next) {
next = event->next_alloc;
for(ee=eventQ.prev_alloc; ee!=&eventQ; ee=ee->prev_alloc)
if(event->alarm >= ee->alarm) break;
event->next_alloc = ee->next_alloc; event->prev_alloc = ee;
event->next_alloc->prev_alloc = ee->next_alloc = event;
}
delayedQ.next_alloc = delayedQ.prev_alloc = &delayedQ;
}
static ulong_t wbuf_num_to_free = 0;
static ulong_t mshr_num_to_free = 0;
static ulong_t mshr2_num_to_free = 0;
static WBuf_p wbuf_to_free_list = NULL;
static MSHR_p mshr_to_free_list = NULL;
static MSHR_p mshr2_to_free_list = NULL;
static WBuf_p *wbuf_to_free_list_end = &wbuf_to_free_list;
static MSHR_p *mshr_to_free_list_end = &mshr_to_free_list;
static MSHR_p *mshr2_to_free_list_end = &mshr2_to_free_list;
void
mshr_free(MSHR_p mshr)
{
assert(!mshr->is_free && mshr->ref_count > 0);
if(--mshr->ref_count > 0) return;
if(mshr->mshr2) mshr2_free(mshr->mshr2);
++mshr_num_to_free;
*mshr_to_free_list_end = mshr;
mshr_to_free_list_end = &mshr->next_free;
mshr->next_free = NULL;
mshr->is_free = 1;
}
void
mshr2_free(MSHR_p mshr)
{
assert(!mshr->is_free && mshr->ref_count > 0);
if(--mshr->ref_count > 0) return;
++mshr2_num_to_free;
*mshr2_to_free_list_end = mshr;
mshr2_to_free_list_end = &mshr->next_free;
mshr->next_free = NULL;
mshr->is_free = 1;
}
void
wbuf_free(WBuf_p wbuf)
{
assert(!wbuf->is_free);
++wbuf_num_to_free;
*wbuf_to_free_list_end = wbuf;
wbuf_to_free_list_end = &wbuf->next_free;
wbuf->next_free = NULL;
wbuf->is_free = 1;
}
void
finish_free()
{
static u_longlong_t last_free_cycle = 0;
if(cache_cycle <= last_free_cycle) return;
last_free_cycle = cache_cycle;
*mshr_to_free_list_end = mshr_free_list;
mshr_to_free_list_end = &mshr_to_free_list;
mshr_free_list = mshr_to_free_list;
mshr_num_free += mshr_num_to_free;
mshr_num_to_free = 0;
*mshr2_to_free_list_end = mshr2_free_list;
mshr2_to_free_list_end = &mshr2_to_free_list;
mshr2_free_list = mshr2_to_free_list;
mshr2_num_free += mshr2_num_to_free;
mshr2_num_to_free = 0;
*wbuf_to_free_list_end = wbuf_free_list;
wbuf_to_free_list_end = &wbuf_to_free_list;
wbuf_free_list = wbuf_to_free_list;
wbuf_num_free += wbuf_num_to_free;
wbuf_num_to_free = 0;
}
/******************************************************************************
* The actual cache data structures and access functions.
*/
int
lookup_l1(ulong_t va)
{
ulong_t line = (va % L1_BANK_SIZE) / L1_LINE_WIDTH;
int ii;
va &= ~(L1_LINE_WIDTH-1);
for(ii=0; ii<L1_CACHE_ASSOC; ++ii) {
if(!l1_cache[line][ii].is_free &&
l1_cache[line][ii].address == va) {
l1_cache[line][ii].touch = 1;
lookup_l2(va);
return 1;
}
}
return 0;
}
void
update_l1(ulong_t va)
{
ulong_t line = (va % L1_BANK_SIZE) / L1_LINE_WIDTH;
int ii;
va &= ~(L1_LINE_WIDTH-1);
for(ii=0; ii<L1_CACHE_ASSOC; ++ii) {
if(!l1_cache[line][ii].is_free &&
l1_cache[line][ii].address == va) {
return;
}
}
for(ii = l1_clock[line];
!l1_cache[line][ii].is_free && l1_cache[line][ii].touch;
ii = (ii+1) % L1_CACHE_ASSOC) {
l1_cache[line][ii].touch = 0;
}
l1_cache[line][ii].address = va;
l1_cache[line][ii].is_free = 0;
l1_cache[line][ii].touch = 0;
l1_clock[line] = (ii+1) % L1_CACHE_ASSOC;
}
void
flush_line_l1(ulong_t va)
{
ulong_t line = (va % L1_BANK_SIZE) / L1_LINE_WIDTH;
int ii;
va &= ~(L1_LINE_WIDTH-1);
for(ii=0; ii<L1_CACHE_ASSOC; ++ii) {
if(!l1_cache[line][ii].is_free &&
l1_cache[line][ii].address == va) {
l1_cache[line][ii].is_free = 1;
return;
}
}
}
int
lookup_l2(ulong_t va)
{
ulong_t line = (va % L2_BANK_SIZE) / L2_LINE_WIDTH;
int ii;
va &= ~(L2_LINE_WIDTH-1);
for(ii=0; ii<L2_CACHE_ASSOC; ++ii) {
if(!l2_cache[line][ii].is_free &&
l2_cache[line][ii].address == va) {
l2_cache[line][ii].touch = 1;
return 1;
}
}
return 0;
}
void
update_l2(ulong_t va)
{
ulong_t line = (va % L2_BANK_SIZE) / L2_LINE_WIDTH;
int ii;
va &= ~(L2_LINE_WIDTH-1);
for(ii=0; ii<L2_CACHE_ASSOC; ++ii) {
if(!l2_cache[line][ii].is_free &&
l2_cache[line][ii].address == va) {
return;
}
}
for(ii = l2_clock[line];
!l2_cache[line][ii].is_free && l2_cache[line][ii].touch;
ii = (ii+1) % L2_CACHE_ASSOC) {
l2_cache[line][ii].touch = 0;
}
l2_cache[line][ii].address = va;
l2_cache[line][ii].is_free = 0;
l2_cache[line][ii].touch = 0;
l2_clock[line] = (ii+1) % L2_CACHE_ASSOC;
}
void
flush_line_l2(ulong_t va)
{
ulong_t line = (va % L2_BANK_SIZE) / L2_LINE_WIDTH;
int ii;
va &= ~(L2_LINE_WIDTH-1);
for(ii=0; ii<L2_CACHE_ASSOC; ++ii) {
if(!l2_cache[line][ii].is_free &&
l2_cache[line][ii].address == va) {
l2_cache[line][ii].is_free = 1;
return;
}
}
}