/****************************************************************************** ** FILE: cache.c ** Cache and memory simulator. */ #include <assert.h> #include <signal.h> #include <ucontext.h> #include <sys/types.h> #include "internal.h" #include "fastsim.h" #include "param.h" /* These are the functions that are called from Facile */ extern void xcache_init(); extern void xcache_flush(); extern int xcache_load_start(ulong_t inum, ulong_t va, int first, short *delay); extern int xcache_load_continue(ulong_t inum, short *delay); extern int xcache_store_start(ulong_t inum, ulong_t va, int is_spec, short *delay); extern int xcache_store_commit(ulong inum, short *delay); extern void xcache_store_rollback(ulong inum); extern void xcache_retire(ulong N); /****************************************************************************** * Data types used by the cache simulator: */ typedef u_longlong_t alarm_t; typedef enum EventTag { EVENT_LOAD_L1, EVENT_LOAD_L2, EVENT_LOAD_MEM, EVENT_STORE, } EventTag_t; typedef struct CacheEvent { struct CacheEvent *next_alloc; struct CacheEvent *prev_alloc; struct CacheEvent *next_free; EventTag_t kind; alarm_t alarm; void* data; } CacheEvent_t, *CacheEvent_p; static CacheEvent_t eventQ; static CacheEvent_t delayedQ; static CacheEvent_p free_events; #define MAX_NUM_EVENTS 512 static CacheEvent_t _events[MAX_NUM_EVENTS]; static CacheEvent_p event_top(); static CacheEvent_p event_alloc(alarm_t); static void event_free(CacheEvent_p); static CacheEvent_p delayed_event_alloc(alarm_t); static void add_delayed_events(); typedef struct MSHR { struct MSHR *next_free; /* next mshr in free list */ ulong_t address; /* cache line aligned load address */ alarm_t alarm; /* time of next relevant event */ struct MSHR *mshr2; /* point from l1-mshr to l2-mshr */ ushort_t istart; /* value of istate start (when mshr alloced) */ short inum; /* inum of load (when mshr alloced) */ uchar_t wait; /* TRUE if waiting for a store to finish */ uchar_t ref_count; /* ref-count for garbage collection */ uchar_t is_free; /* flag if this mshr not allocated */ #ifdef STATS uchar_t num_coalesced; /* number of loads coalsced into this MSHR */ #endif } MSHR_t, *MSHR_p; static MSHR_t mshr_list[NUM_MSHRs]; static MSHR_p mshr_free_list; static ulong_t mshr_num_free; static void mshr_free(MSHR_p); static MSHR_t mshr2_list[NUM_MSHR2s]; static MSHR_p mshr2_free_list; static ulong_t mshr2_num_free; static void mshr2_free(MSHR_p); typedef struct WBuf { struct WBuf *next_free; /* next wbuf in free list */ ulong_t address; /* wbuf aligned store adderess */ alarm_t alarm; /* time to next relevant event */ ushort_t istart; /* value of istate start (when mshr alloced) */ short inum; /* inum of load (when mshr alloced) */ uchar_t is_free; /* flag if this wbuf not allocated */ } WBuf_t, *WBuf_p; static WBuf_t wbuf_list[NUM_WBUFs]; static WBuf_p wbuf_free_list; static ulong_t wbuf_num_free; static void wbuf_free(WBuf_p); static void finish_free(); /* finish MSHR & WBuf free operations */ typedef struct CacheEntry { ulong_t address; /* cache line alligned address */ uchar_t is_free; /* flag if this entry not allocated */ uchar_t touch; /* flag when touched (for LRU algorithm) */ } CacheEntry_t, *CacheEntry_p; #define L1_BANK_SIZE (L1_CACHE_SIZE/L1_CACHE_ASSOC) #define L1_BANK_LINES (L1_BANK_SIZE/L1_LINE_WIDTH) static CacheEntry_t l1_cache[L1_BANK_LINES][L1_CACHE_ASSOC]; static uchar_t l1_clock[L1_BANK_LINES]; #define L2_BANK_SIZE (L2_CACHE_SIZE/L2_CACHE_ASSOC) #define L2_BANK_LINES (L2_BANK_SIZE/L2_LINE_WIDTH) static CacheEntry_t l2_cache[L2_BANK_LINES][L2_CACHE_ASSOC]; static uchar_t l2_clock[L2_BANK_LINES]; typedef union { MSHR_p mshr; WBuf_p wbuf; } IState_t; void mem_test(ulong_t va); static int testing_va = 0; static ulong_t va_to_test; static int va_test_failed; /****************************************************************************** * Static functions and global data: */ /* Current cycle of the pipeline simulation. (defined in Facile) */ extern u_longlong_t xCPU_cycle; /* Last simulation cycle reached by the cache simulator. */ static u_longlong_t cache_cycle = 0; #define NUM_ISTATES 0x100 static ulong_t istate_start = 0; /* start index for istate array */ static IState_t istate[NUM_ISTATES]; /* interface state array */ /* Compute the delay from the current cycle to a given alarm cycle. */ static inline short alarm_to_delay(alarm_t alarm) { return alarm - xCPU_cycle; } static inline alarm_t mshr_alarm(MSHR_p mshr) { MSHR_p mshr2 = mshr->mshr2; if(mshr2) return mshr2->alarm; else return mshr->alarm; } static inline int _buf_inum(int inum, ulong_t istart) { int ret = (inum + istart) % NUM_ISTATES - istate_start; if(ret > NUM_ISTATES/2) return ret - NUM_ISTATES; if(ret < -(NUM_ISTATES/2)) return ret + NUM_ISTATES; return ret; } #define BUF_INUM(buf) (_buf_inum((buf)->inum,(buf)->istart)) static MSHR_p cache_load_start(ulong_t,int,short*,int); static int cache_load_continue(MSHR_p,short*); static WBuf_p cache_store_start(ulong_t,int,short*,int); static int cache_commit(WBuf_p,short*); static void cache_rollback(WBuf_p); static void load_l1(MSHR_p); /* waits for conflicting stores */ static void load_l2(MSHR_p); /* process a L2 cache lookup */ static int lookup_l1(ulong_t); static void update_l1(ulong_t); static void flush_line_l1(ulong_t); static int lookup_l2(ulong_t); static void update_l2(ulong_t); static void flush_line_l2(ulong_t); /* Cache Statistics */ hrtime_t time_cache = 0; static hrtime_t time_cache_start; u_longlong_t count_cache_flush = 0; u_longlong_t count_cache_load_start = 0; u_longlong_t count_cache_no_mshr = 0; u_longlong_t count_cache_load_continue = 0; u_longlong_t count_cache_load_hit_l1 = 0; u_longlong_t count_cache_load_hit_l2 = 0; u_longlong_t count_cache_load_memory = 0; u_longlong_t count_cache_load_removed = 0; u_longlong_t count_cache_store_start = 0; u_longlong_t count_cache_store_spec = 0; u_longlong_t count_cache_no_wbuf = 0; u_longlong_t count_cache_no_wbuf_spec = 0; u_longlong_t count_cache_store_commit = 0; u_longlong_t count_cache_store_commit_fail = 0; u_longlong_t count_cache_store_rollback = 0; u_longlong_t count_cache_segfault = 0; /****************************************************************************** * Cache simulator interface functions: These functions hide the cache * simulator's internal data structures (i.e., MSHR_t and WBuf_t) from * Facile. */ void xcache_init() { ulong_t ii, jj; for(ii=0; ii<MAX_NUM_EVENTS-1; ++ii) _events[ii].next_free = &_events[ii+1]; _events[MAX_NUM_EVENTS-1].next_free = NULL; free_events = _events; eventQ.next_alloc = &eventQ; eventQ.prev_alloc = &eventQ; eventQ.alarm = 0; delayedQ.next_alloc = &delayedQ; delayedQ.prev_alloc = &delayedQ; delayedQ.alarm = 0; for(ii=0; ii<NUM_MSHRs; ++ii) { mshr_list[ii].next_free = &mshr_list[ii+1]; mshr_list[ii].is_free = 1; } mshr_list[NUM_MSHRs-1].next_free = NULL; mshr_free_list = mshr_list; mshr_num_free = NUM_MSHRs; for(ii=0; ii<NUM_MSHR2s; ++ii) { mshr2_list[ii].next_free = &mshr2_list[ii+1]; mshr2_list[ii].is_free = 1; } mshr2_list[NUM_MSHR2s-1].next_free = NULL; mshr2_free_list = mshr2_list; mshr2_num_free = NUM_MSHR2s; for(ii=0; ii<NUM_WBUFs; ++ii) { wbuf_list[ii].next_free = &wbuf_list[ii+1]; wbuf_list[ii].is_free = 1; } wbuf_list[NUM_WBUFs-1].next_free = NULL; wbuf_free_list = wbuf_list; wbuf_num_free = NUM_WBUFs; for(ii=0; ii<L1_BANK_LINES; ++ii) { for(jj=0; jj<L1_CACHE_ASSOC; ++jj) l1_cache[ii][jj].is_free = 1; l1_clock[ii] = 0; } for(ii=0; ii<L2_BANK_LINES; ++ii) { for(jj=0; jj<L2_CACHE_ASSOC; ++jj) l2_cache[ii][jj].is_free = 1; l2_clock[ii] = 0; } istate_start = 0; } void xcache_flush() { #ifdef STATS ++count_cache_flush; #endif } int xcache_load_start(ulong_t inum, ulong_t va, int first, short *delay) { ulong_t index; MSHR_p mshr; int ret; #ifdef STATS ++count_cache_load_start; time_cache_start = gethrtime(); #endif index = (inum + istate_start) % NUM_ISTATES; mshr = cache_load_start(va,first,delay,(int)inum); istate[index].mshr = mshr; ret = (mshr || *delay<0) ? 0 : 1; #ifdef STATS time_cache += gethrtime() - time_cache_start; if(*delay < 0) ++count_cache_no_mshr; #endif return ret; } int xcache_load_continue(ulong_t inum, short *delay) { ulong_t index; int ret; #ifdef STATS ++count_cache_load_continue; time_cache_start = gethrtime(); #endif index = (inum + istate_start) % NUM_ISTATES; ret = cache_load_continue(istate[index].mshr,delay); #ifdef STATS time_cache += gethrtime() - time_cache_start; #endif return ret; } int xcache_store_start(ulong_t inum, ulong_t va, int is_spec, short *delay) { ulong_t index; WBuf_p wbuf; int ret; #ifdef STATS ++count_cache_store_start; if(is_spec) ++count_cache_store_spec; time_cache_start = gethrtime(); #endif index = (inum + istate_start) % NUM_ISTATES; wbuf = cache_store_start(va,is_spec,delay,(int)inum); istate[index].wbuf = wbuf; ret = wbuf ? 1 : 0; #ifdef STATS time_cache += gethrtime() - time_cache_start; if(!wbuf) { ++count_cache_no_wbuf; if(is_spec) ++count_cache_no_wbuf_spec; } else if(*delay > 0) ++count_cache_store_commit_fail; #endif return ret; } int xcache_store_commit(ulong_t inum, short *delay) { ulong_t index; int ret; #ifdef STATS ++count_cache_store_commit; time_cache_start = gethrtime(); #endif index = (inum + istate_start) % NUM_ISTATES; ret = cache_commit(istate[index].wbuf,delay); #ifdef STATS time_cache += gethrtime() - time_cache_start; if(!ret) ++count_cache_store_commit_fail; #endif return ret; } void xcache_store_rollback(ulong_t inum) { ulong_t index; #ifdef STATS ++count_cache_store_rollback; time_cache_start = gethrtime(); #endif index = (inum + istate_start) % NUM_ISTATES; cache_rollback(istate[index].wbuf); #ifdef STATS time_cache += gethrtime() - time_cache_start; #endif } void xcache_retire(ulong_t N) { istate_start = (istate_start + N) % NUM_ISTATES; } /****************************************************************************** ****************************************************************************** *** The rest of these routines perform the actual cache simulation! *** ****************************************************************************** *****************************************************************************/ /* Advance cache simulation to current execution time. */ static void cache_advance() { assert(xCPU_cycle >= cache_cycle); for(;;) { CacheEvent_p event = event_top(); if(!event || event->alarm > xCPU_cycle) break; cache_cycle = event->alarm; finish_free(); switch(event->kind) { case EVENT_LOAD_L1: load_l1((MSHR_p)event->data); break; case EVENT_LOAD_L2: load_l2((MSHR_p)event->data); break; case EVENT_LOAD_MEM: { /* just waiting to finish the load */ MSHR_p mshr = (MSHR_p)event->data; update_l1(mshr->address); assert(mshr->mshr2); update_l2(mshr->address); #ifdef STATS count_cache_load_memory += mshr->num_coalesced; #endif mshr_free(mshr); break; } case EVENT_STORE: { /* just waiting to finish the store */ WBuf_p wbuf = (WBuf_p)event->data; add_delayed_events(); wbuf_free(wbuf); break; } default: assert(!"Unknown cache event!"); break; } event_free(event); } add_delayed_events(); cache_cycle = xCPU_cycle; finish_free(); } /****************************************************************************** * This is a never before seen load request. * Find given address in the L1 cache, else allocate an L1 MSHR * to manage the miss throught the rest of the memory heirarchy. */ MSHR_p cache_load_start(ulong_t va, int first, short *delay, int inum) { CacheEvent_p event; alarm_t alarm = 0; MSHR_p mshr = NULL; WBuf_p wbuf = NULL; int mshr_inum; int wbuf_inum; int ii; if(inum > 0) { if(va == 0) xmem_test_failed = 1; else { xmem_test_flag = 1; xmem_test_failed = 0; xmem_test_address = (void*)va; xmem_load8_test((void*)va); xmem_test_flag = 0; } if(xmem_test_failed) { ++count_cache_segfault; *delay = -128; return NULL; } } va &= ~(L1_LINE_WIDTH-1); /************************************************************************** * Update cache state, advancing the simulation up * to the current execution time (ie, CPU_cycle). */ cache_advance(inum); /* Conservatively avoid conflicts with data in the write-buffer. * Don't allow any conflicting loads until prior writes complete. */ alarm = xCPU_cycle + 1; wbuf_inum = -NUM_ISTATES; for(ii=0; ii<NUM_WBUFs; ++ii) { WBuf_p _wbuf = &wbuf_list[ii]; int _wbuf_inum = BUF_INUM(_wbuf); if(_wbuf->is_free) continue; if((_wbuf->address & ~(L1_LINE_WIDTH-1)) == va && _wbuf_inum <= inum) { if(_wbuf->alarm > alarm) alarm = _wbuf->alarm; if(_wbuf_inum > wbuf_inum) { wbuf_inum = _wbuf_inum; wbuf = _wbuf; } } } /* Lookup address in the L1 cache. */ if(!wbuf && lookup_l1(va)) { #ifdef STATS ++count_cache_load_hit_l1; #endif *delay = CACHE_HIT_DELAY; return NULL; } /* L1 cache miss: First try coalescing this load * with a prior load request. */ for(ii=0; ii<NUM_MSHRs; ++ii) { mshr = &mshr_list[ii]; mshr_inum = BUF_INUM(mshr); if(mshr->is_free || (wbuf && mshr_inum < wbuf_inum)) continue; if(mshr->address == va && mshr_inum <= inum) { *delay = alarm_to_delay(mshr_alarm(mshr)); #ifdef STATS ++mshr->num_coalesced; #endif return mshr; } } /* Test if we can allocate a new L1 MSHR. */ if(mshr_num_free < 1 || (!first && mshr_num_free <= 1)) { /* Cannot allocate an MSHR for the L1 cache; * Return the minimum cycle delay until one becomes free. */ alarm = ~(alarm_t)0; for(ii=0; ii<NUM_MSHRs; ++ii) { mshr = &mshr_list[ii]; if(mshr->is_free) continue; if(mshr_alarm(mshr) < alarm) alarm = mshr_alarm(mshr); } /* delay returned negative => operation aborted */ *delay = -alarm_to_delay(alarm); return NULL; } if(!wbuf) alarm = xCPU_cycle + L1_MISS_DELAY; /* Allocate new L1 MSHR. */ mshr = mshr_free_list; mshr_free_list = mshr->next_free; mshr->is_free = 0; mshr->ref_count = 1; --mshr_num_free; mshr->address = va & ~(L1_LINE_WIDTH-1); mshr->alarm = alarm; mshr->mshr2 = NULL; mshr->istart = istate_start; mshr->inum = inum; mshr->wait = wbuf ? 1 : 0; #ifdef STATS mshr->num_coalesced = 1; #endif /* Add a cache event to continue processing this load request. */ event = event_alloc(alarm); event->kind = wbuf ? EVENT_LOAD_L1 : EVENT_LOAD_L2; event->data = mshr; /* Set the delay for an L1 cache miss, * and return the corresponding MSHR. */ *delay = alarm_to_delay(alarm); return mshr; } /****************************************************************************** * Delay has expired on an existing load; Advance cache simulation. */ int cache_load_continue(MSHR_p mshr, short *delay) { CacheEvent_p event; alarm_t alarm; WBuf_p wbuf; /************************************************************************** * Update cache state, advancing the simulation up * to the current execution time (ie, CPU_cycle). */ cache_advance(); alarm = mshr_alarm(mshr); assert(alarm >= xCPU_cycle); if(alarm == xCPU_cycle) { /* if load is done */ *delay = CACHE_HIT_DELAY; return 1; } /* Load still in progress */ *delay = alarm_to_delay(alarm); return 0; } void load_l1(MSHR_p mshr) { CacheEvent_p event; WBuf_p wbuf = NULL; alarm_t alarm = 0; int wbuf_inum; int mshr_inum; int retry = 0; int ii; /* Verify that all conflicting stores have completed. */ alarm = xCPU_cycle; wbuf_inum = -NUM_ISTATES; mshr_inum= BUF_INUM(mshr); for(ii=0; ii<NUM_WBUFs; ++ii) { WBuf_p _wbuf = &wbuf_list[ii]; int _wbuf_inum = BUF_INUM(_wbuf); if(_wbuf->is_free) continue; if((_wbuf->address & ~(L1_LINE_WIDTH-1)) == mshr->address && _wbuf_inum <= mshr_inum) { if(_wbuf->alarm > alarm) alarm = _wbuf->alarm; if(_wbuf_inum > wbuf_inum) { wbuf_inum = _wbuf_inum; wbuf = _wbuf; } } } if(wbuf) { /* Still waiting for conflicting stores */ assert(!mshr->mshr2); if(!wbuf->alarm) ++alarm; retry = alarm == xCPU_cycle; } else { /* No more conflicting stores => start processing load. */ if(lookup_l1(mshr->address)) { /* Hit in L1 cache */ #ifdef STATS count_cache_load_hit_l1 += mshr->num_coalesced; #endif mshr_free(mshr); return; } /* Miss in L1 cache */ alarm = xCPU_cycle + L1_MISS_DELAY; } /* Add a cache event to continue processing this load request. */ mshr->alarm = alarm; mshr->wait = wbuf ? 1 : 0; if(!retry) event = event_alloc(alarm); else event = delayed_event_alloc(alarm); event->kind = wbuf ? EVENT_LOAD_L1 : EVENT_LOAD_L2; event->data = mshr; } void load_l2(MSHR_p mshr) { CacheEvent_p event; int mshr_inum; MSHR_p mshr2; int ii; /* Lookup load address in the L2 cache. * If found, then we are done with this load. */ if(lookup_l2(mshr->address)) { #ifdef STATS count_cache_load_hit_l2 += mshr->num_coalesced; #endif update_l1(mshr->address); mshr_free(mshr); return; } /************************************************************************** * Load missed in the L2 cache. */ /* Try to assign this load to an L2 MSHR. First test if it can be * coalesced into an existing L2 MSHR for an earlier instruction. */ mshr_inum = BUF_INUM(mshr); for(ii=0; ii<NUM_MSHR2s; ++ii) { mshr2 = &mshr2_list[ii]; if(mshr2->is_free) continue; if(mshr2->address == (mshr->address & ~(L2_LINE_WIDTH-1)) && BUF_INUM(mshr2) <= mshr_inum) { mshr->mshr2 = mshr2; ++mshr2->ref_count; break; } } if(!mshr->mshr2) { /* Allocate a new L2 MSHR for this load. * (There should always be a free L2 MSHR.) */ mshr2 = mshr2_free_list; mshr2_free_list = mshr2->next_free; mshr2->is_free = 0; mshr2->ref_count = 1; --mshr2_num_free; mshr2->address = mshr->address & ~(L2_LINE_WIDTH-1); mshr2->alarm = cache_cycle + L2_MISS_DELAY; mshr2->istart = mshr->istart; mshr2->inum = mshr->inum; mshr->mshr2 = mshr2; } /* Add a cache event to continue processing this load request. */ event = event_alloc(mshr_alarm(mshr)); event->kind = EVENT_LOAD_MEM; event->data = mshr; } WBuf_p cache_store_start(ulong_t va, int is_spec, short *delay, int inum) { WBuf_p wbuf; int ii; va &= ~(WRITE_LINE_WIDTH-1); /********************************************************************** * Update cache state, advancing the simulation up * to the current execution time (ie, CPU_cycle). */ cache_advance(); /* Try to allocate a new write-buffer. */ if(wbuf_num_free <= 0) { /* No free write-buffers; Delay 'til first free buffer. */ alarm_t alarm = ~(alarm_t)0; for(ii=0; ii<NUM_WBUFs; ++ii) { wbuf = &wbuf_list[ii]; if(wbuf->is_free) continue; if(wbuf->alarm > 0 && wbuf->alarm < alarm) alarm = wbuf->alarm; } if(alarm == ~(alarm_t)0) *delay = -1; else { assert(alarm > xCPU_cycle); *delay = -alarm_to_delay(alarm); } return NULL; } /* Allocate a new write-buffer. */ wbuf = wbuf_free_list; wbuf_free_list = wbuf->next_free; wbuf->is_free = 0; --wbuf_num_free; wbuf->address = va & ~(WRITE_LINE_WIDTH-1); wbuf->istart = istate_start; wbuf->inum = inum; wbuf->alarm = 0; if(is_spec) *delay = 0; else { ii = cache_commit(wbuf,delay); assert(ii || *delay > 0); } /* This store may conflict with executing loads that appear to * occur later in the instruction stream. This can happen if a * load was issued specualtively, then later rolled back. If a * conflict exists then cancel the load. */ for(ii=0; ii<NUM_MSHRs; ++ii) { MSHR_p _mshr = &mshr_list[ii]; if(_mshr->is_free) continue; if(_mshr->address == (va & ~(L1_LINE_WIDTH-1)) && BUF_INUM(_mshr) >= inum && !_mshr->wait) { /* Remove pending events for this MSHR and free the MSHR */ CacheEvent_p event = eventQ.next_alloc; for(event=eventQ.next_alloc; event!=&eventQ; event=event->next_alloc) { switch(event->kind) { case EVENT_LOAD_L1: case EVENT_LOAD_L2: case EVENT_LOAD_MEM: if((MSHR_p)event->data == _mshr) event_free(event); break; default: break; } } #ifdef STATS count_cache_load_removed += _mshr->num_coalesced; #endif mshr_free(_mshr); } } return wbuf; } int cache_commit(WBuf_p wbuf, short *delay) { CacheEvent_p event; MSHR_p mshr = NULL; alarm_t alarm = 0; int wbuf_inum; int ii; /************************************************************************** * Update cache state, advancing the simulation up * to the current execution time (ie, CPU_cycle). */ cache_advance(); /* Make sure this store occurs after conflicting loads complete. */ wbuf_inum = BUF_INUM(wbuf); for(ii=0; ii<NUM_MSHRs; ++ii) { MSHR_p _mshr = &mshr_list[ii]; if(_mshr->is_free) continue; if(_mshr->address == (wbuf->address & ~(L1_LINE_WIDTH-1)) && BUF_INUM(_mshr) < wbuf_inum && mshr_alarm(_mshr) > alarm) { mshr = _mshr; alarm = mshr_alarm(_mshr); } } if(mshr) { /* Conflicting load found */ wbuf->alarm = alarm; *delay = alarm_to_delay(alarm); return 0; } /* Continue to simulate timing of store out to memory. */ alarm = xCPU_cycle + WRITE_DELAY; wbuf->alarm = alarm; /* Add a cache event to continue processing this store. */ event = event_alloc(alarm); event->kind = EVENT_STORE; event->data = wbuf; /* No conflicts => allow commit to succeed. */ *delay = 0; return 1; } void cache_rollback(WBuf_p wbuf) { /************************************************************************** * Update cache state, advancing the simulation up * to the current execution time (ie, CPU_cycle). */ cache_advance(); /* free resources */ wbuf_free(wbuf); } CacheEvent_p event_top() { CacheEvent_p event = eventQ.next_alloc; if(event == &eventQ) return 0; return event; } CacheEvent_p event_alloc(alarm_t alarm) { CacheEvent_p event, ee; assert(free_events); event = free_events; free_events = event->next_free; event->alarm = alarm; for(ee=eventQ.prev_alloc; ee!=&eventQ; ee=ee->prev_alloc) if(alarm >= ee->alarm) break; event->next_alloc = ee->next_alloc; event->prev_alloc = ee; event->next_alloc->prev_alloc = ee->next_alloc = event; return event; } void event_free(CacheEvent_p event) { event->next_alloc->prev_alloc = event->prev_alloc; event->prev_alloc->next_alloc = event->next_alloc; event->next_free = free_events; free_events = event; } CacheEvent_p delayed_event_alloc(alarm_t alarm) { CacheEvent_p event, ee; assert(free_events); event = free_events; free_events = event->next_free; event->alarm = alarm; for(ee=delayedQ.prev_alloc; ee!=&delayedQ; ee=ee->prev_alloc) if(alarm >= ee->alarm) break; event->next_alloc = ee->next_alloc; event->prev_alloc = ee; event->next_alloc->prev_alloc = ee->next_alloc = event; return event; } void add_delayed_events() { CacheEvent_p event, next, ee; for(event=delayedQ.next_alloc; event!=&delayedQ; event=next) { next = event->next_alloc; for(ee=eventQ.prev_alloc; ee!=&eventQ; ee=ee->prev_alloc) if(event->alarm >= ee->alarm) break; event->next_alloc = ee->next_alloc; event->prev_alloc = ee; event->next_alloc->prev_alloc = ee->next_alloc = event; } delayedQ.next_alloc = delayedQ.prev_alloc = &delayedQ; } static ulong_t wbuf_num_to_free = 0; static ulong_t mshr_num_to_free = 0; static ulong_t mshr2_num_to_free = 0; static WBuf_p wbuf_to_free_list = NULL; static MSHR_p mshr_to_free_list = NULL; static MSHR_p mshr2_to_free_list = NULL; static WBuf_p *wbuf_to_free_list_end = &wbuf_to_free_list; static MSHR_p *mshr_to_free_list_end = &mshr_to_free_list; static MSHR_p *mshr2_to_free_list_end = &mshr2_to_free_list; void mshr_free(MSHR_p mshr) { assert(!mshr->is_free && mshr->ref_count > 0); if(--mshr->ref_count > 0) return; if(mshr->mshr2) mshr2_free(mshr->mshr2); ++mshr_num_to_free; *mshr_to_free_list_end = mshr; mshr_to_free_list_end = &mshr->next_free; mshr->next_free = NULL; mshr->is_free = 1; } void mshr2_free(MSHR_p mshr) { assert(!mshr->is_free && mshr->ref_count > 0); if(--mshr->ref_count > 0) return; ++mshr2_num_to_free; *mshr2_to_free_list_end = mshr; mshr2_to_free_list_end = &mshr->next_free; mshr->next_free = NULL; mshr->is_free = 1; } void wbuf_free(WBuf_p wbuf) { assert(!wbuf->is_free); ++wbuf_num_to_free; *wbuf_to_free_list_end = wbuf; wbuf_to_free_list_end = &wbuf->next_free; wbuf->next_free = NULL; wbuf->is_free = 1; } void finish_free() { static u_longlong_t last_free_cycle = 0; if(cache_cycle <= last_free_cycle) return; last_free_cycle = cache_cycle; *mshr_to_free_list_end = mshr_free_list; mshr_to_free_list_end = &mshr_to_free_list; mshr_free_list = mshr_to_free_list; mshr_num_free += mshr_num_to_free; mshr_num_to_free = 0; *mshr2_to_free_list_end = mshr2_free_list; mshr2_to_free_list_end = &mshr2_to_free_list; mshr2_free_list = mshr2_to_free_list; mshr2_num_free += mshr2_num_to_free; mshr2_num_to_free = 0; *wbuf_to_free_list_end = wbuf_free_list; wbuf_to_free_list_end = &wbuf_to_free_list; wbuf_free_list = wbuf_to_free_list; wbuf_num_free += wbuf_num_to_free; wbuf_num_to_free = 0; } /****************************************************************************** * The actual cache data structures and access functions. */ int lookup_l1(ulong_t va) { ulong_t line = (va % L1_BANK_SIZE) / L1_LINE_WIDTH; int ii; va &= ~(L1_LINE_WIDTH-1); for(ii=0; ii<L1_CACHE_ASSOC; ++ii) { if(!l1_cache[line][ii].is_free && l1_cache[line][ii].address == va) { l1_cache[line][ii].touch = 1; lookup_l2(va); return 1; } } return 0; } void update_l1(ulong_t va) { ulong_t line = (va % L1_BANK_SIZE) / L1_LINE_WIDTH; int ii; va &= ~(L1_LINE_WIDTH-1); for(ii=0; ii<L1_CACHE_ASSOC; ++ii) { if(!l1_cache[line][ii].is_free && l1_cache[line][ii].address == va) { return; } } for(ii = l1_clock[line]; !l1_cache[line][ii].is_free && l1_cache[line][ii].touch; ii = (ii+1) % L1_CACHE_ASSOC) { l1_cache[line][ii].touch = 0; } l1_cache[line][ii].address = va; l1_cache[line][ii].is_free = 0; l1_cache[line][ii].touch = 0; l1_clock[line] = (ii+1) % L1_CACHE_ASSOC; } void flush_line_l1(ulong_t va) { ulong_t line = (va % L1_BANK_SIZE) / L1_LINE_WIDTH; int ii; va &= ~(L1_LINE_WIDTH-1); for(ii=0; ii<L1_CACHE_ASSOC; ++ii) { if(!l1_cache[line][ii].is_free && l1_cache[line][ii].address == va) { l1_cache[line][ii].is_free = 1; return; } } } int lookup_l2(ulong_t va) { ulong_t line = (va % L2_BANK_SIZE) / L2_LINE_WIDTH; int ii; va &= ~(L2_LINE_WIDTH-1); for(ii=0; ii<L2_CACHE_ASSOC; ++ii) { if(!l2_cache[line][ii].is_free && l2_cache[line][ii].address == va) { l2_cache[line][ii].touch = 1; return 1; } } return 0; } void update_l2(ulong_t va) { ulong_t line = (va % L2_BANK_SIZE) / L2_LINE_WIDTH; int ii; va &= ~(L2_LINE_WIDTH-1); for(ii=0; ii<L2_CACHE_ASSOC; ++ii) { if(!l2_cache[line][ii].is_free && l2_cache[line][ii].address == va) { return; } } for(ii = l2_clock[line]; !l2_cache[line][ii].is_free && l2_cache[line][ii].touch; ii = (ii+1) % L2_CACHE_ASSOC) { l2_cache[line][ii].touch = 0; } l2_cache[line][ii].address = va; l2_cache[line][ii].is_free = 0; l2_cache[line][ii].touch = 0; l2_clock[line] = (ii+1) % L2_CACHE_ASSOC; } void flush_line_l2(ulong_t va) { ulong_t line = (va % L2_BANK_SIZE) / L2_LINE_WIDTH; int ii; va &= ~(L2_LINE_WIDTH-1); for(ii=0; ii<L2_CACHE_ASSOC; ++ii) { if(!l2_cache[line][ii].is_free && l2_cache[line][ii].address == va) { l2_cache[line][ii].is_free = 1; return; } } }