# HG changeset patch # User Lena Olson # Date 1449098773 21600 # Node ID 20dc45bc1490d609185c620cc5e8db2bce89b074 # Parent 3a87241adfb8c993f4ba2671ae6cc7082743ee71 Configs: Update checkpoint logic to fix taking and restoring checkpoints at the same time diff -r 3a87241adfb8 -r 20dc45bc1490 configs/common/Simulation.py --- a/configs/common/Simulation.py Sat Oct 11 16:18:51 2014 -0500 +++ b/configs/common/Simulation.py Wed Dec 02 17:26:13 2015 -0600 @@ -485,7 +485,8 @@ # option only for finding the checkpoints to restore from. This # lets us test checkpointing by restoring from one set of # checkpoints, generating a second set, and then comparing them. - if options.take_checkpoints and options.checkpoint_restore: + if (options.take_checkpoints or options.checkpoint_at_end) and \ + options.checkpoint_restore: if m5.options.outdir: cptdir = m5.options.outdir else: # HG changeset patch # User Lena Olson # Date 1449098817 21600 # Node ID 157e8df00aa5075936b75f412529b00aed4ee7dd # Parent 20dc45bc1490d609185c620cc5e8db2bce89b074 Mem: Require flush requests to have a response * * * Ruby: Fix flush response request leak The flush-response patch makes the FlushReq MemCmd require a response, which causes requests to not be deleted in the packet destructor during cooldown of the caches before checkpointing. Requests are 72B, so larger systems with reasonably large caches leak a lot of memory for each checkpoint taken. NOTE: This change will be folded into the jason/flush-responses patch. diff -r 20dc45bc1490 -r 157e8df00aa5 src/mem/packet.cc --- a/src/mem/packet.cc Wed Dec 02 17:26:13 2015 -0600 +++ b/src/mem/packet.cc Wed Dec 02 17:26:57 2015 -0600 @@ -166,7 +166,11 @@ /* PrintReq */ { SET2(IsRequest, IsPrint), InvalidCmd, "PrintReq" }, /* Flush Request */ - { SET3(IsRequest, IsFlush, NeedsExclusive), InvalidCmd, "FlushReq" }, + { SET4(IsRequest, IsFlush, NeedsExclusive, NeedsResponse), FlushResp, + "FlushReq" }, + /* Flush Response */ + { SET3(IsResponse, IsFlush, NeedsExclusive), InvalidCmd, + "FlushResp" }, /* Invalidation Request */ { SET3(NeedsExclusive, IsInvalidate, IsRequest), InvalidCmd, "InvalidationReq" }, diff -r 20dc45bc1490 -r 157e8df00aa5 src/mem/packet.hh --- a/src/mem/packet.hh Wed Dec 02 17:26:13 2015 -0600 +++ b/src/mem/packet.hh Wed Dec 02 17:26:57 2015 -0600 @@ -119,6 +119,7 @@ // Fake simulator-only commands PrintReq, // Print state matching address FlushReq, //request for a cache flush + FlushResp, InvalidationReq, // request for address to be invalidated from lsq NUM_MEM_CMDS }; diff -r 20dc45bc1490 -r 157e8df00aa5 src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc Wed Dec 02 17:26:13 2015 -0600 +++ b/src/mem/ruby/system/Sequencer.cc Wed Dec 02 17:26:57 2015 -0600 @@ -568,6 +568,8 @@ delete pkt; g_system_ptr->m_cache_recorder->enqueueNextFetchRequest(); } else if (g_system_ptr->m_cooldown_enabled) { + assert(pkt->req); + delete pkt->req; delete pkt; g_system_ptr->m_cache_recorder->enqueueNextFlushRequest(); } else { # HG changeset patch # User Joel Hestness # Date 1449098817 21600 # Node ID a9b64c93ebd89aeea0a70ad7e02dd32827015cb4 # Parent 157e8df00aa5075936b75f412529b00aed4ee7dd Add the x86 magic instruction to do a callback into gem5 when a gpu call is made by the application that is being run * * * Fixed M5 magic instruction m5_gpu to be serializing * * * ARM ISA: Add m5_gpu magic instruction Submitter: Jieming Yin In order to add gem5-gpu ARM support, the ARM ISA needs to include the m5_gpu magic instruction. This patch adds that instruction including passing memory addresses as 64-bit rather than 32-bit as the underlying architecture. * * * m5ops: Add ARM32 m5_gpu interface function Submitter: Jieming Yin Add the m5 utils function to intercept m5_gpu calls under ARM32. This is a required step before building ARM32 CPU functionality for gem5-gpu. NOTE: other patches are required for the ARM decoder to know how to handle m5_gpu as a pseudo-instruction. diff -r 157e8df00aa5 -r a9b64c93ebd8 src/arch/arm/isa/formats/m5ops.isa --- a/src/arch/arm/isa/formats/m5ops.isa Wed Dec 02 17:26:57 2015 -0600 +++ b/src/arch/arm/isa/formats/m5ops.isa Wed Dec 02 17:26:57 2015 -0600 @@ -68,6 +68,7 @@ case 0x54: return new M5panic(machInst); case 0x5a: return new M5workbegin(machInst); case 0x5b: return new M5workend(machInst); + case 0x5c: return new M5gpu(machInst); } } ''' diff -r 157e8df00aa5 -r a9b64c93ebd8 src/arch/arm/isa/insts/m5ops.isa --- a/src/arch/arm/isa/insts/m5ops.isa Wed Dec 02 17:26:57 2015 -0600 +++ b/src/arch/arm/isa/insts/m5ops.isa Wed Dec 02 17:26:57 2015 -0600 @@ -563,4 +563,18 @@ header_output += BasicDeclare.subst(m5workendIop) decoder_output += BasicConstructor.subst(m5workendIop) exec_output += PredOpExecute.subst(m5workendIop) + + m5gpuCode = '''PseudoInst::gpu( + xc->tcBase(), + join32to64(R1, R0), + join32to64(R3, R2) + );''' + m5gpuIop = InstObjParams("m5gpu", "M5gpu", "PredOp", + { "code": m5gpuCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(m5gpuIop) + decoder_output += BasicConstructor.subst(m5gpuIop) + exec_output += PredOpExecute.subst(m5gpuIop) + }}; diff -r 157e8df00aa5 -r a9b64c93ebd8 src/arch/x86/isa/decoder/two_byte_opcodes.isa --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa Wed Dec 02 17:26:57 2015 -0600 +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa Wed Dec 02 17:26:57 2015 -0600 @@ -216,6 +216,9 @@ 0x5b: m5_work_end({{ PseudoInst::workend(xc->tcBase(), Rdi, Rsi); }}, IsNonSpeculative); + 0x5c: m5_gpu({{ + PseudoInst::gpu(xc->tcBase(), Rdi, Rsi); + }}, IsNonSpeculative, IsSerializeAfter); default: Inst::UD2(); } } diff -r 157e8df00aa5 -r a9b64c93ebd8 src/sim/pseudo_inst.cc --- a/src/sim/pseudo_inst.cc Wed Dec 02 17:26:57 2015 -0600 +++ b/src/sim/pseudo_inst.cc Wed Dec 02 17:26:57 2015 -0600 @@ -706,4 +706,10 @@ } } +void +gpu(ThreadContext *tc, uint64_t param1, uint64_t param2) +{ + panic("gpu pseudo instruction not yet defined"); +} + } // namespace PseudoInst diff -r 157e8df00aa5 -r a9b64c93ebd8 src/sim/pseudo_inst.hh --- a/src/sim/pseudo_inst.hh Wed Dec 02 17:26:57 2015 -0600 +++ b/src/sim/pseudo_inst.hh Wed Dec 02 17:26:57 2015 -0600 @@ -88,6 +88,7 @@ void switchcpu(ThreadContext *tc); void workbegin(ThreadContext *tc, uint64_t workid, uint64_t threadid); void workend(ThreadContext *tc, uint64_t workid, uint64_t threadid); +void gpu(ThreadContext *tc, uint64_t param1, uint64_t param2); } // namespace PseudoInst diff -r 157e8df00aa5 -r a9b64c93ebd8 util/m5/Makefile.x86 --- a/util/m5/Makefile.x86 Wed Dec 02 17:26:57 2015 -0600 +++ b/util/m5/Makefile.x86 Wed Dec 02 17:26:57 2015 -0600 @@ -32,6 +32,7 @@ LD=ld CFLAGS=-O2 -DM5OP_ADDR=0xFFFF0000 +LDFLAGS=-static OBJS=m5.o m5op_x86.o all: m5 @@ -43,7 +44,7 @@ $(CC) $(CFLAGS) -o $@ -c $< m5: $(OBJS) - $(CC) -o $@ $(OBJS) + $(CC) $(LDFLAGS) -o $@ $(OBJS) clean: rm -f *.o m5 diff -r 157e8df00aa5 -r a9b64c93ebd8 util/m5/m5.c --- a/util/m5/m5.c Wed Dec 02 17:26:57 2015 -0600 +++ b/util/m5/m5.c Wed Dec 02 17:26:57 2015 -0600 @@ -101,32 +101,32 @@ } } -int -write_file(const char *filename) -{ - fprintf(stderr, "opening %s\n", filename); - int src_fid = open(filename, O_RDONLY); - - if (src_fid < 0) { - fprintf(stderr, "error opening %s\n", filename); - return; - } - - char buf[256*1024]; - int offset = 0; - int len; - int bytes = 0; - - memset(buf, 0, sizeof(buf)); - - while ((len = read(src_fid, buf, sizeof(buf))) > 0) { - bytes += m5_writefile(buf, len, offset, filename); - offset += len; - } - fprintf(stderr, "written %d bytes\n", bytes); - - close(src_fid); -} +//int +//write_file(const char *filename) +//{ +// fprintf(stderr, "opening %s\n", filename); +// int src_fid = open(filename, O_RDONLY); +// +// if (src_fid < 0) { +// fprintf(stderr, "error opening %s\n", filename); +// return; +// } +// +// char buf[256*1024]; +// int offset = 0; +// int len; +// int bytes = 0; +// +// memset(buf, 0, sizeof(buf)); +// +// while ((len = read(src_fid, buf, sizeof(buf))) > 0) { +// bytes += m5_writefile(buf, len, offset, filename); +// offset += len; +// } +// fprintf(stderr, "written %d bytes\n", bytes); +// +// close(src_fid); +//} void do_exit(int argc, char *argv[]) @@ -183,16 +183,16 @@ read_file(STDOUT_FILENO); } -void -do_write_file(int argc, char *argv[]) -{ - if (argc != 1) - usage(); - - const char *filename = argv[0]; - - write_file(filename); -} +//void +//do_write_file(int argc, char *argv[]) +//{ +// if (argc != 1) +// usage(); +// +// const char *filename = argv[0]; +// +// write_file(filename); +//} void do_exec_file(int argc, char *argv[]) @@ -255,6 +255,16 @@ (param >> 12) & 0xfff, (param >> 0) & 0xfff); } +void +do_gpu(int argc, char *argv[]) +{ + if (argc != 0) + usage(); + + // @TODO: Figure out params that need to be passed + m5_gpu(); +} + #ifdef linux void do_pin(int argc, char *argv[]) @@ -294,12 +304,13 @@ { "dumpstats", do_dump_stats, "[delay [period]]" }, { "dumpresetstats", do_dump_reset_stats, "[delay [period]]" }, { "readfile", do_read_file, "" }, - { "writefile", do_write_file, "" }, +// { "writefile", do_write_file, "" }, { "execfile", do_exec_file, "" }, { "checkpoint", do_checkpoint, "[delay [period]]" }, { "loadsymbol", do_load_symbol, "
" }, { "initparam", do_initparam, "" }, { "sw99param", do_sw99param, "" }, + { "gpu", do_gpu, "" }, #ifdef linux { "pin", do_pin, " [args ...]" } #endif diff -r 157e8df00aa5 -r a9b64c93ebd8 util/m5/m5op.h --- a/util/m5/m5op.h Wed Dec 02 17:26:57 2015 -0600 +++ b/util/m5/m5op.h Wed Dec 02 17:26:57 2015 -0600 @@ -54,13 +54,14 @@ void m5_dump_stats(uint64_t ns_delay, uint64_t ns_period); void m5_dumpreset_stats(uint64_t ns_delay, uint64_t ns_period); uint64_t m5_readfile(void *buffer, uint64_t len, uint64_t offset); -uint64_t m5_writefile(void *buffer, uint64_t len, uint64_t offset, const char *filename); +//uint64_t m5_writefile(void *buffer, uint64_t len, uint64_t offset, const char *filename); void m5_debugbreak(void); void m5_switchcpu(void); void m5_addsymbol(uint64_t addr, char *symbol); void m5_panic(void); void m5_work_begin(uint64_t workid, uint64_t threadid); void m5_work_end(uint64_t workid, uint64_t threadid); +void m5_gpu(); // These operations are for critical path annotation void m5a_bsm(char *sm, const void *id, int flags); diff -r 157e8df00aa5 -r a9b64c93ebd8 util/m5/m5op_arm.S --- a/util/m5/m5op_arm.S Wed Dec 02 17:26:57 2015 -0600 +++ b/util/m5/m5op_arm.S Wed Dec 02 17:26:57 2015 -0600 @@ -89,6 +89,7 @@ SIMPLE_OP(m5_panic, panic_func, 0) SIMPLE_OP(m5_work_begin, work_begin_func, 0) SIMPLE_OP(m5_work_end, work_end_func, 0) +SIMPLE_OP(m5_gpu, gpu_func, 0) SIMPLE_OP(m5a_bsm, annotate_func, an_bsm) SIMPLE_OP(m5a_esm, annotate_func, an_esm) diff -r 157e8df00aa5 -r a9b64c93ebd8 util/m5/m5op_x86.S --- a/util/m5/m5op_x86.S Wed Dec 02 17:26:57 2015 -0600 +++ b/util/m5/m5op_x86.S Wed Dec 02 17:26:57 2015 -0600 @@ -83,3 +83,4 @@ TWO_BYTE_OP(m5_panic, panic_func) TWO_BYTE_OP(m5_work_begin, work_begin_func) TWO_BYTE_OP(m5_work_end, work_end_func) +TWO_BYTE_OP(m5_gpu, gpu_func) diff -r 157e8df00aa5 -r a9b64c93ebd8 util/m5/m5ops.h --- a/util/m5/m5ops.h Wed Dec 02 17:26:57 2015 -0600 +++ b/util/m5/m5ops.h Wed Dec 02 17:26:57 2015 -0600 @@ -61,6 +61,7 @@ #define work_begin_func 0x5a #define work_end_func 0x5b +#define gpu_func 0x5c // These operations are for critical path annotation #define annotate_func 0x55 # HG changeset patch # User Joel Hestness # Date 1449098817 21600 # Node ID a0e6ac8d1c02bd0ba50992af8e0d63797257ee26 # Parent a9b64c93ebd89aeea0a70ad7e02dd32827015cb4 Setup gem5 magic instructions to handle CUDA calls and build out the coordination with an updated version of libcuda * * * This patch should be folded into the full-system GPGPU-Sim functionality patch after appropriate implementation of sending GPU ST data into Ruby through write requests. * * * This patch should be folded into the full-system GPGPU-Sim functionality patch after appropriate implementation of requesting GPU LD data from Ruby through read requests. * * * Fold into the gpgpu-sim glue patch * * * Updating to apply patches to changeset 8929: Update configuration files to use the new option parser organization * * * Changes to the stream processor array to be folded into fs_functionality * * * Merge the instruction memory access in GPGPU-Sim into the gem5 memory hierarchy * * * Fold into merge_inst_memory patch * * * Fixup uninitialized values in SP array * * * Fix the issue of multiple buffered writes to the same data. Ordering is defined as last received by buffer will be written to line * * * Add GPU syscall for registering device memory * * * GPU Magic Instruction: Modify header for pointer handling For the inclusion of ARM 32-bit, we will want to pass a single pointer type to gem5-gpu through the m5_gpu pseudo-instruction. To avoid separate handling for 32- vs. 64-bit architectures, use a uint64_t in the m5op header rather than the gpusyscall_t* pointer, which can have different sizes. NOTE: To use this updated function header will require the updated version of libcuda, though this change is backward compatible to allow old binaries to still execute correctly diff -r a9b64c93ebd8 -r a0e6ac8d1c02 src/sim/pseudo_inst.cc --- a/src/sim/pseudo_inst.cc Wed Dec 02 17:26:57 2015 -0600 +++ b/src/sim/pseudo_inst.cc Wed Dec 02 17:26:57 2015 -0600 @@ -73,6 +73,12 @@ #include "sim/system.hh" #include "sim/vptr.hh" +//#include "../../gpgpu-sim/src/gem5/gpu_syscalls.hh" +struct gpusyscall; +typedef struct gpusyscall gpusyscall_t; +typedef uint64_t (*cudaFunc_t)(ThreadContext *, gpusyscall_t *); +extern cudaFunc_t gpgpu_funcs[]; + using namespace std; using namespace Stats; @@ -707,9 +713,14 @@ } void -gpu(ThreadContext *tc, uint64_t param1, uint64_t param2) +gpu(ThreadContext *tc, uint64_t gpusysno, uint64_t call_params) { - panic("gpu pseudo instruction not yet defined"); + if (gpusysno > 83) { + warn("Ignoring gpu syscall %d\n", gpusysno); + return; + } + + gpgpu_funcs[gpusysno](tc, (gpusyscall_t*)call_params); } } // namespace PseudoInst diff -r a9b64c93ebd8 -r a0e6ac8d1c02 src/sim/pseudo_inst.hh --- a/src/sim/pseudo_inst.hh Wed Dec 02 17:26:57 2015 -0600 +++ b/src/sim/pseudo_inst.hh Wed Dec 02 17:26:57 2015 -0600 @@ -88,7 +88,7 @@ void switchcpu(ThreadContext *tc); void workbegin(ThreadContext *tc, uint64_t workid, uint64_t threadid); void workend(ThreadContext *tc, uint64_t workid, uint64_t threadid); -void gpu(ThreadContext *tc, uint64_t param1, uint64_t param2); +void gpu(ThreadContext *tc, uint64_t gpusysno, uint64_t call_params); } // namespace PseudoInst diff -r a9b64c93ebd8 -r a0e6ac8d1c02 util/m5/m5.c --- a/util/m5/m5.c Wed Dec 02 17:26:57 2015 -0600 +++ b/util/m5/m5.c Wed Dec 02 17:26:57 2015 -0600 @@ -258,11 +258,12 @@ void do_gpu(int argc, char *argv[]) { - if (argc != 0) + if (argc < 1) usage(); - // @TODO: Figure out params that need to be passed - m5_gpu(); + uint64_t* callno = (uint64_t*)argv[0]; + + m5_gpu(*callno, NULL); } #ifdef linux diff -r a9b64c93ebd8 -r a0e6ac8d1c02 util/m5/m5op.h --- a/util/m5/m5op.h Wed Dec 02 17:26:57 2015 -0600 +++ b/util/m5/m5op.h Wed Dec 02 17:26:57 2015 -0600 @@ -61,7 +61,7 @@ void m5_panic(void); void m5_work_begin(uint64_t workid, uint64_t threadid); void m5_work_end(uint64_t workid, uint64_t threadid); -void m5_gpu(); +void m5_gpu(uint64_t __gpusysno, uint64_t call_params); // These operations are for critical path annotation void m5a_bsm(char *sm, const void *id, int flags); # HG changeset patch # User Joel Hestness # Date 1449098817 21600 # Node ID a3b87764d17ed733620870e4374f892b4212dfe3 # Parent a0e6ac8d1c02bd0ba50992af8e0d63797257ee26 Regressions: Add gem5 bits and pieces for gem5-gpu regressions To run gem5-style regressions on gem5-gpu requires updating a few parts of the gem5 regress infrastructure. Specifically, (1) add gem5-gpu as a test type in the tests SConscript, (2) add the appropriate GPU stats in diff-out to make sure we check them, and (3) add symlinks to the regression config scripts. Notes: To run these tests requires two gem5-gpu patches that include the other scripts and initial set of regression tests. diff -r a0e6ac8d1c02 -r a3b87764d17e tests/SConscript --- a/tests/SConscript Wed Dec 02 17:26:57 2015 -0600 +++ b/tests/SConscript Wed Dec 02 17:26:57 2015 -0600 @@ -347,6 +347,9 @@ 'rubytest', 'memtest', 'memtest-filter', 'tgen-simple-mem', 'tgen-dram-ctrl'] +if env['GPGPU_SIM']: + configs.append('gem5-gpu') + if env['PROTOCOL'] != 'None': if env['PROTOCOL'] == 'MI_example': configs += [c + "-ruby" for c in configs] diff -r a0e6ac8d1c02 -r a3b87764d17e tests/configs/gem5-gpu-ruby.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/configs/gem5-gpu-ruby.py Wed Dec 02 17:26:57 2015 -0600 @@ -0,0 +1,1 @@ +../../../gem5-gpu/tests/configs/gem5-gpu-ruby.py \ No newline at end of file diff -r a0e6ac8d1c02 -r a3b87764d17e tests/diff-out --- a/tests/diff-out Wed Dec 02 17:26:57 2015 -0600 +++ b/tests/diff-out Wed Dec 02 17:26:57 2015 -0600 @@ -202,7 +202,8 @@ 'sim_ops', 'sim_ticks', 'host_inst_rate', - 'host_mem_usage' + 'host_mem_usage', + 'inst_counts' ); $key_stat_pattern = join('|', @key_stat_list); diff -r a0e6ac8d1c02 -r a3b87764d17e tests/quick/se_gpu --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/quick/se_gpu Wed Dec 02 17:26:57 2015 -0600 @@ -0,0 +1,1 @@ +../../../gem5-gpu/tests/quick/se_gpu/ \ No newline at end of file # HG changeset patch # User Joel Hestness # Date 1449098817 21600 # Node ID 8cf19b2fd1d8ab1d6ca517f86e30dbb312b35834 # Parent a3b87764d17ed733620870e4374f892b4212dfe3 DirectoryMemory: Fix the number of directory bits calculation diff -r a3b87764d17e -r 8cf19b2fd1d8 src/mem/ruby/structures/DirectoryMemory.cc --- a/src/mem/ruby/structures/DirectoryMemory.cc Wed Dec 02 17:26:57 2015 -0600 +++ b/src/mem/ruby/structures/DirectoryMemory.cc Wed Dec 02 17:26:57 2015 -0600 @@ -98,8 +98,8 @@ if (m_num_directories_bits == 0) return 0; - uint64 ret = address.bitSelect(m_numa_high_bit - m_num_directories_bits + 1, - m_numa_high_bit); + uint64 ret = address.shiftLowOrderBits(m_numa_high_bit - m_num_directories_bits + 1) % m_num_directories; + return ret; } # HG changeset patch # User Joel Hestness # Date 1449098818 21600 # Node ID cee4758c8ec8c8d8c7f5f9af3f6e199bbc6bf683 # Parent 8cf19b2fd1d8ab1d6ca517f86e30dbb312b35834 imported patch common/fix_memory_controller_timings diff -r 8cf19b2fd1d8 -r cee4758c8ec8 src/mem/ruby/structures/RubyMemoryControl.cc --- a/src/mem/ruby/structures/RubyMemoryControl.cc Wed Dec 02 17:26:57 2015 -0600 +++ b/src/mem/ruby/structures/RubyMemoryControl.cc Wed Dec 02 17:26:58 2015 -0600 @@ -547,7 +547,8 @@ bank, m_event.scheduled() ? 'Y':'N'); if (req->m_msgptr) { // don't enqueue L3 writebacks - enqueueToDirectory(req, Cycles(m_mem_ctl_latency + m_mem_fixed_delay)); + enqueueToDirectory(req, Cycles(m_mem_ctl_latency + m_mem_fixed_delay + + m_bank_busy_time + m_basic_bus_busy_time)); } m_oldRequest[bank] = 0; markTfaw(rank); # HG changeset patch # User Joel Hestness # Date 1449098818 21600 # Node ID 303eda09549df0943f83e70ddf55c197d2168f58 # Parent cee4758c8ec8c8d8c7f5f9af3f6e199bbc6bf683 Ruby Directory Memory: Update to map/index for GPU In order to have a split memory hierarchy for the CPU and GPU, the device directories (GPU directories) need to be aware of the mapping of memory across different device directories This patch does not affect the standard gem5 functionality of the directory controller. diff -r cee4758c8ec8 -r 303eda09549d src/mem/ruby/SConscript --- a/src/mem/ruby/SConscript Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/ruby/SConscript Wed Dec 02 17:26:58 2015 -0600 @@ -40,6 +40,7 @@ DebugFlag('ProtocolTrace') DebugFlag('RubyCache') DebugFlag('RubyCacheTrace') +DebugFlag('RubyDirectoryMemory') DebugFlag('RubyDma') DebugFlag('RubyGenerated') DebugFlag('RubyMemory') diff -r cee4758c8ec8 -r 303eda09549d src/mem/ruby/structures/DirectoryMemory.cc --- a/src/mem/ruby/structures/DirectoryMemory.cc Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/ruby/structures/DirectoryMemory.cc Wed Dec 02 17:26:58 2015 -0600 @@ -28,6 +28,7 @@ #include "base/intmath.hh" #include "debug/RubyCache.hh" +#include "debug/RubyDirectoryMemory.hh" #include "debug/RubyStats.hh" #include "mem/ruby/slicc_interface/RubySlicc_Util.hh" #include "mem/ruby/structures/DirectoryMemory.hh" @@ -40,6 +41,10 @@ uint64_t DirectoryMemory::m_total_size_bytes = 0; int DirectoryMemory::m_numa_high_bit = 0; +int DirectoryMemory::m_num_dev_directories = 0; +uint64_t DirectoryMemory::m_device_segment_base = 0; +int DirectoryMemory::m_num_dev_directories_bits = 0; + DirectoryMemory::DirectoryMemory(const Params *p) : SimObject(p) { @@ -50,6 +55,7 @@ m_use_map = p->use_map; m_map_levels = p->map_levels; m_numa_high_bit = p->numa_high_bit; + m_device_directory = p->device_directory; } void @@ -67,8 +73,14 @@ m_ram = g_system_ptr->getMemoryVector(); } - m_num_directories++; - m_num_directories_bits = ceilLog2(m_num_directories); + if (m_device_directory) { + m_num_dev_directories++; + m_num_dev_directories_bits = ceilLog2(m_num_dev_directories); + } else { + m_num_directories++; + m_num_directories_bits = ceilLog2(m_num_directories); + m_device_segment_base += m_size_bytes; + } m_total_size_bytes += m_size_bytes; if (m_numa_high_bit == 0) { @@ -92,13 +104,25 @@ } } +#define DEV_DIR_BITS 8 + uint64 DirectoryMemory::mapAddressToDirectoryVersion(PhysAddress address) { - if (m_num_directories_bits == 0) - return 0; - - uint64 ret = address.shiftLowOrderBits(m_numa_high_bit - m_num_directories_bits + 1) % m_num_directories; + uint64 ret; + if (m_num_dev_directories > 0) { + Addr addr = address.getAddress(); + if (addr >= m_device_segment_base) { + PhysAddress relative_addr; + relative_addr.setAddress(addr - m_device_segment_base); + ret = relative_addr.shiftLowOrderBits(m_numa_high_bit - m_num_dev_directories_bits + 1) % m_num_dev_directories; + ret += m_num_directories; + } else { + ret = address.shiftLowOrderBits(m_numa_high_bit - m_num_directories_bits + 1) % m_num_directories; + } + } else { + ret = address.shiftLowOrderBits(m_numa_high_bit - m_num_directories_bits + 1) % m_num_directories; + } return ret; } @@ -114,14 +138,36 @@ DirectoryMemory::mapAddressToLocalIdx(PhysAddress address) { uint64 ret; - if (m_num_directories_bits > 0) { - ret = address.bitRemove(m_numa_high_bit - m_num_directories_bits + 1, - m_numa_high_bit); + if (m_num_dev_directories > 0) { + if (address.getAddress() >= m_device_segment_base) { + PhysAddress relative_address; + relative_address.setAddress(address.getAddress() - m_device_segment_base); + if (m_num_dev_directories_bits > 0) { + ret = relative_address.bitRemove(m_numa_high_bit - m_num_dev_directories_bits + 1, + m_numa_high_bit); + } else { + ret = relative_address.getAddress(); + } + } else { + if (m_num_directories_bits > 0) { + ret = address.bitRemove(m_numa_high_bit - m_num_directories_bits + 1, + m_numa_high_bit); + } else { + ret = address.getAddress(); + } + } } else { - ret = address.getAddress(); + if (m_num_directories_bits > 0) { + ret = address.bitRemove(m_numa_high_bit - m_num_directories_bits + 1, + m_numa_high_bit); + } else { + ret = address.getAddress(); + } } - return ret >> (RubySystem::getBlockSizeBits()); + ret >>= (RubySystem::getBlockSizeBits()); + DPRINTF(RubyDirectoryMemory, "%#x, %u\n", address.getAddress(), ret); + return ret; } AbstractEntry* diff -r cee4758c8ec8 -r 303eda09549d src/mem/ruby/structures/DirectoryMemory.hh --- a/src/mem/ruby/structures/DirectoryMemory.hh Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/ruby/structures/DirectoryMemory.hh Wed Dec 02 17:26:58 2015 -0600 @@ -91,6 +91,11 @@ SparseMemory* m_sparseMemory; bool m_use_map; int m_map_levels; + + bool m_device_directory; + static int m_num_dev_directories; + static int m_num_dev_directories_bits; + static uint64_t m_device_segment_base; }; inline std::ostream& diff -r cee4758c8ec8 -r 303eda09549d src/mem/ruby/structures/DirectoryMemory.py --- a/src/mem/ruby/structures/DirectoryMemory.py Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/ruby/structures/DirectoryMemory.py Wed Dec 02 17:26:58 2015 -0600 @@ -42,3 +42,4 @@ # the default value of the numa high bit is specified in the command line # option and must be passed into the directory memory sim object numa_high_bit = Param.Int("numa high bit") + device_directory = Param.Bool(False, "this directory is for a device") # HG changeset patch # User Lena Olson # Date 1449098818 21600 # Node ID c64a1d516f200310d184be2ee26ee490f3c5c126 # Parent 303eda09549df0943f83e70ddf55c197d2168f58 Ruby: Enable slicc to get data from RubyRequest This allows protocols to get the data out of the ruby request before the read/writeCallback function is called. This is useful when implementing write-through protocols where the data is needed before the transaction is complete. diff -r 303eda09549d -r c64a1d516f20 src/mem/protocol/RubySlicc_Types.sm --- a/src/mem/protocol/RubySlicc_Types.sm Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/protocol/RubySlicc_Types.sm Wed Dec 02 17:26:58 2015 -0600 @@ -123,6 +123,7 @@ int Size, desc="size in bytes of access"; PrefetchBit Prefetch, desc="Is this a prefetch request"; int contextId, desc="this goes away but must be replace with Nilay"; + void writeData(DataBlock); } structure(AbstractEntry, primitive="yes", external = "yes") { diff -r 303eda09549d -r c64a1d516f20 src/mem/ruby/slicc_interface/RubyRequest.hh --- a/src/mem/ruby/slicc_interface/RubyRequest.hh Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh Wed Dec 02 17:26:58 2015 -0600 @@ -36,6 +36,7 @@ #include "mem/protocol/RubyAccessMode.hh" #include "mem/protocol/RubyRequestType.hh" #include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/DataBlock.hh" class RubyRequest : public Message { @@ -81,6 +82,12 @@ const int& getSize() const { return m_Size; } const PrefetchBit& getPrefetch() const { return m_Prefetch; } + void + writeData(DataBlock& block) const + { + block.setData(data, m_PhysicalAddress.getOffset(), m_Size); + } + void print(std::ostream& out) const; bool functionalRead(Packet *pkt); bool functionalWrite(Packet *pkt); # HG changeset patch # User Lena Olson # Date 1449098818 21600 # Node ID f3bb1f41536bce09c2200107e175e3a019e88086 # Parent c64a1d516f200310d184be2ee26ee490f3c5c126 Mem: Add and implement FlushAll command in Ruby. A FlushAll message requests the receiving controller to flush everything from its cache. This patch: - Adds a FlushAllReq/Resp MemCmd. - FLUSHALL RubyRequestType - Flash invalidate function to the Ruby cache controllers. The flash invalidate function invalidates all lines which are resident in the cache. There is a check to be sure that there are no lines in the busy or r/w state, although this may need to change in the future to support a more general flash invalidate. diff -r c64a1d516f20 -r f3bb1f41536b src/mem/packet.cc --- a/src/mem/packet.cc Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/packet.cc Wed Dec 02 17:26:58 2015 -0600 @@ -174,6 +174,11 @@ /* Invalidation Request */ { SET3(NeedsExclusive, IsInvalidate, IsRequest), InvalidCmd, "InvalidationReq" }, + /* FlushAll Request */ + { SET4(IsRequest, NeedsResponse, IsFlush, IsInvalidate), FlushAllResp, + "FlushAllReq"}, + /* FlushAll Response */ + { SET3(IsResponse, IsFlush, IsInvalidate), InvalidCmd, "FlushAllResp"}, }; bool diff -r c64a1d516f20 -r f3bb1f41536b src/mem/packet.hh --- a/src/mem/packet.hh Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/packet.hh Wed Dec 02 17:26:58 2015 -0600 @@ -121,6 +121,8 @@ FlushReq, //request for a cache flush FlushResp, InvalidationReq, // request for address to be invalidated from lsq + FlushAllReq, // Flush entire cache request + FlushAllResp, NUM_MEM_CMDS }; diff -r c64a1d516f20 -r f3bb1f41536b src/mem/protocol/RubySlicc_Exports.sm --- a/src/mem/protocol/RubySlicc_Exports.sm Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/protocol/RubySlicc_Exports.sm Wed Dec 02 17:26:58 2015 -0600 @@ -135,6 +135,7 @@ COMMIT, desc="Commit version"; NULL, desc="Invalid request type"; FLUSH, desc="Flush request type"; + FLUSHALL, desc="Flush everything from the cache"; } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { diff -r c64a1d516f20 -r f3bb1f41536b src/mem/protocol/RubySlicc_Types.sm --- a/src/mem/protocol/RubySlicc_Types.sm Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/protocol/RubySlicc_Types.sm Wed Dec 02 17:26:58 2015 -0600 @@ -153,6 +153,7 @@ void setMRU(Address); void recordRequestType(CacheRequestType); bool checkResourceAvailable(CacheResourceType, Address); + void flashInvalidate(); Scalar demand_misses; Scalar demand_hits; diff -r c64a1d516f20 -r f3bb1f41536b src/mem/ruby/structures/CacheMemory.cc --- a/src/mem/ruby/structures/CacheMemory.cc Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/ruby/structures/CacheMemory.cc Wed Dec 02 17:26:58 2015 -0600 @@ -323,6 +323,24 @@ } void +CacheMemory::flashInvalidate() +{ + // NOTE: It may make sense to invalidate Read_Write data but the assert + // is added for safety. + for (int i = 0; i < m_cache_num_sets; i++) { + for (int j = 0; j < m_cache_assoc; j++) { + if (m_cache[i][j] == NULL) { + continue; + } + assert(m_cache[i][j]->m_Permission != AccessPermission_Busy); + assert(m_cache[i][j]->m_Permission != AccessPermission_Read_Write); + m_cache[i][j]->changePermission(AccessPermission_NotPresent); + } + } + m_tag_index.clear(); +} + +void CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const { uint64 warmedUpBlocks = 0; diff -r c64a1d516f20 -r f3bb1f41536b src/mem/ruby/structures/CacheMemory.hh --- a/src/mem/ruby/structures/CacheMemory.hh Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/ruby/structures/CacheMemory.hh Wed Dec 02 17:26:58 2015 -0600 @@ -102,6 +102,8 @@ void clearLocked (const Address& addr); bool isLocked (const Address& addr, int context); + void flashInvalidate(); + // Print cache contents void print(std::ostream& out) const; void printData(std::ostream& out) const; diff -r c64a1d516f20 -r f3bb1f41536b src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/ruby/system/Sequencer.cc Wed Dec 02 17:26:58 2015 -0600 @@ -235,7 +235,8 @@ (request_type == RubyRequestType_Store_Conditional) || (request_type == RubyRequestType_Locked_RMW_Read) || (request_type == RubyRequestType_Locked_RMW_Write) || - (request_type == RubyRequestType_FLUSH)) { + (request_type == RubyRequestType_FLUSH) || + (request_type == RubyRequestType_FLUSHALL)) { // Check if there is any outstanding read request for the same // cache line. @@ -445,7 +446,8 @@ (request->m_type == RubyRequestType_Store_Conditional) || (request->m_type == RubyRequestType_Locked_RMW_Read) || (request->m_type == RubyRequestType_Locked_RMW_Write) || - (request->m_type == RubyRequestType_FLUSH)); + (request->m_type == RubyRequestType_FLUSH) || + (request->m_type == RubyRequestType_FLUSHALL)); // // For Alpha, properly handle LL, SC, and write requests with respect to @@ -485,7 +487,8 @@ markRemoved(); assert((request->m_type == RubyRequestType_LD) || - (request->m_type == RubyRequestType_IFETCH)); + (request->m_type == RubyRequestType_IFETCH) || + (request->m_type == RubyRequestType_FLUSHALL)); hitCallback(request, data, true, mach, externalHit, initialRequestTime, forwardRequestTime, firstResponseTime); @@ -652,7 +655,11 @@ // primary_type = secondary_type = RubyRequestType_ST; } else if (pkt->isFlush()) { - primary_type = secondary_type = RubyRequestType_FLUSH; + if (pkt->cmd == MemCmd::FlushAllReq) { + primary_type = secondary_type = RubyRequestType_FLUSHALL; + } else { + primary_type = secondary_type = RubyRequestType_FLUSH; + } } else { panic("Unsupported ruby packet type\n"); } # HG changeset patch # User Joel Hestness # Date 1449098818 21600 # Node ID 01ee78aff9672274cc1fdd177b0034aad619fa37 # Parent f3bb1f41536bce09c2200107e175e3a019e88086 mem: Add a memory command for fences To communicate fence requests between the CudaCore and the ShaderLSQ, we need to add a packet MemCmd type. This patch will be used by follow-on patches to gem5-gpu. diff -r f3bb1f41536b -r 01ee78aff967 src/mem/packet.cc --- a/src/mem/packet.cc Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/packet.cc Wed Dec 02 17:26:58 2015 -0600 @@ -179,6 +179,10 @@ "FlushAllReq"}, /* FlushAll Response */ { SET3(IsResponse, IsFlush, IsInvalidate), InvalidCmd, "FlushAllResp"}, + /* Fence Request */ + { SET2(IsRequest, NeedsResponse), FenceResp, "FenceReq"}, + /* Fence Response */ + { SET1(IsResponse), InvalidCmd, "FenceResp"}, }; bool diff -r f3bb1f41536b -r 01ee78aff967 src/mem/packet.hh --- a/src/mem/packet.hh Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/packet.hh Wed Dec 02 17:26:58 2015 -0600 @@ -123,6 +123,8 @@ InvalidationReq, // request for address to be invalidated from lsq FlushAllReq, // Flush entire cache request FlushAllResp, + FenceReq, // Enforce memory access ordering based on pkt contents + FenceResp, // Fence operation has completed NUM_MEM_CMDS }; # HG changeset patch # User Joel Hestness # Date 1449098819 21600 # Node ID cc0e36279258b7abec4962aaf2f472a9aa9d1820 # Parent 01ee78aff9672274cc1fdd177b0034aad619fa37 Ruby Memory Controller: Remove refresh deadline With longer latency memory requests that come with more accurate modeling of the bus contention, the prior refresh deadlines no longer make sense. Add a warning when refresh time exceeds 500 cycles, but do not kill simulation. TODO: If accurate refresh modeling becomes desirable, this should be fixed to ensure refresh happens promptly. diff -r 01ee78aff967 -r cc0e36279258 src/mem/ruby/structures/RubyMemoryControl.cc --- a/src/mem/ruby/structures/RubyMemoryControl.cc Wed Dec 02 17:26:58 2015 -0600 +++ b/src/mem/ruby/structures/RubyMemoryControl.cc Wed Dec 02 17:26:59 2015 -0600 @@ -597,7 +597,7 @@ m_refresh_count = m_refresh_period_system; // Are we overrunning our ability to refresh? - assert(m_need_refresh < 10); + if (m_need_refresh >= 500) warn_once("Refresh delayed more than 500 cycles!\n"); m_need_refresh++; } # HG changeset patch # User Joel Hestness # Date 1449098819 21600 # Node ID b58aad0daaf4cdb1c09e262d9739fd094ca2fdc9 # Parent cc0e36279258b7abec4962aaf2f472a9aa9d1820 ruby: Generalize the Cluster network This patch adds two generalizations to the Cluster network: 1) In the case that components from one Cluster should be connected to components within another Cluster, we can add those components to both Clusters. This creates a problem if these two partially-connected Clusters are included as sub-Clusters of another Cluster, because the recursive definition causes the shared portions of the sub-Clusters to be traversed for each path entering the shared portions. Add a check to see if the sub-Cluster has already been instantiated, and if so, simply return to the super-Cluster that is calling the makeTopology function. 2) In the case that multiple sub-Clusters should be connected but the router within one should not be connected to the top-level Cluster (e.g. a tree hierarchy with shared components between subtrees), the Cluster would, by default, add a link between the sub-Cluster router and the top-level Cluster. Add a variable to specify whether the sub-Cluster's router should be connected to the top-level Cluster's router. Default the choice to True. diff -r cc0e36279258 -r b58aad0daaf4 configs/topologies/Cluster.py --- a/configs/topologies/Cluster.py Wed Dec 02 17:26:59 2015 -0600 +++ b/configs/topologies/Cluster.py Wed Dec 02 17:26:59 2015 -0600 @@ -69,14 +69,29 @@ self.extBW = extBW self.intLatency = intLatency self.extLatency = extLatency + self.connectToParent = True def add(self, node): self.nodes.append(node) + # Since Clusters may be recursively defined, it may be desirable to nest + # Clusters without connecting them to higher-level parts of the network + # Use disableConnectToParent() to keep a Cluster from being connected + # to the router of a Cluster that contains it. + def getConnectToParent(self): + return self.connectToParent + + def disableConnectToParent(self): + self.connectToParent = False + def makeTopology(self, options, network, IntLink, ExtLink, Router): """ Recursively make all of the links and routers """ + # If this sub-Cluster has already been constructed + if self.router is not None: + return + # make a router to connect all of the nodes self.router = Router(router_id=self.num_routers()) network.routers.append(self.router) @@ -85,24 +100,25 @@ if type(node) == Cluster: node.makeTopology(options, network, IntLink, ExtLink, Router) - # connect this cluster to the router - link = IntLink(link_id=self.num_int_links(), node_a=self.router, - node_b=node.router) + if node.getConnectToParent(): + # connect this cluster to the router + link = IntLink(link_id=self.num_int_links(), + node_a=self.router, node_b=node.router) - if node.extBW: - link.bandwidth_factor = node.extBW + if node.extBW: + link.bandwidth_factor = node.extBW - # if there is an interanl b/w for this node - # and no ext b/w to override - elif self.intBW: - link.bandwidth_factor = self.intBW + # if there is an interanl b/w for this node + # and no ext b/w to override + elif self.intBW: + link.bandwidth_factor = self.intBW - if node.extLatency: - link.latency = node.extLatency - elif self.intLatency: - link.latency = self.intLatency + if node.extLatency: + link.latency = node.extLatency + elif self.intLatency: + link.latency = self.intLatency - network.int_links.append(link) + network.int_links.append(link) else: # node is just a controller, # connect it to the router via a ext_link # HG changeset patch # User Lena Olson # Date 1449098819 21600 # Node ID e4786d785afefce8fa4cb98de330271e5a4fd543 # Parent b58aad0daaf4cdb1c09e262d9739fd094ca2fdc9 Add flag to TLB to optionally bypass L1 Uses bypass option in Ruby to bypass the L1 cache for TLB accesses. This may be useful for systems with a small L1 cache that you do not want polluted by page walks. diff -r b58aad0daaf4 -r e4786d785afe src/arch/x86/X86TLB.py --- a/src/arch/x86/X86TLB.py Wed Dec 02 17:26:59 2015 -0600 +++ b/src/arch/x86/X86TLB.py Wed Dec 02 17:26:59 2015 -0600 @@ -49,6 +49,9 @@ system = Param.System(Parent.any, "system object") num_squash_per_cycle = Param.Unsigned(4, "Number of outstanding walks that can be squashed per cycle") + bypass_l1 = Param.Bool(False, "Bypass the L1 cache when issuing memory \ + accesses for pagetable walks. Useful for \ + caches that may hold stale data.") class X86TLB(BaseTLB): type = 'X86TLB' diff -r b58aad0daaf4 -r e4786d785afe src/arch/x86/pagetable_walker.cc --- a/src/arch/x86/pagetable_walker.cc Wed Dec 02 17:26:59 2015 -0600 +++ b/src/arch/x86/pagetable_walker.cc Wed Dec 02 17:26:59 2015 -0600 @@ -577,6 +577,9 @@ entry.vaddr = vaddr; Request::Flags flags = Request::PHYSICAL; + if (walker->bypassL1) { + flags.set(Request::BYPASS_L1); + } if (cr3.pcd) flags.set(Request::UNCACHEABLE); RequestPtr request = new Request(topAddr, dataSize, flags, diff -r b58aad0daaf4 -r e4786d785afe src/arch/x86/pagetable_walker.hh --- a/src/arch/x86/pagetable_walker.hh Wed Dec 02 17:26:59 2015 -0600 +++ b/src/arch/x86/pagetable_walker.hh Wed Dec 02 17:26:59 2015 -0600 @@ -180,6 +180,9 @@ // The number of outstanding walks that can be squashed per cycle. unsigned numSquashable; + // If true, send all memory requests with the bypass L1 flag true + bool bypassL1; + // Wrapper for checking for squashes before starting a translation. void startWalkWrapper(); @@ -207,7 +210,8 @@ MemObject(params), port(name() + ".port", this), funcState(this, NULL, NULL, true), tlb(NULL), sys(params->system), masterId(sys->getMasterId(name())), - numSquashable(params->num_squash_per_cycle) + numSquashable(params->num_squash_per_cycle), + bypassL1(params->bypass_l1) { } }; diff -r b58aad0daaf4 -r e4786d785afe src/mem/request.hh --- a/src/mem/request.hh Wed Dec 02 17:26:59 2015 -0600 +++ b/src/mem/request.hh Wed Dec 02 17:26:59 2015 -0600 @@ -140,6 +140,8 @@ static const FlagsType PF_EXCLUSIVE = 0x02000000; /** The request should be marked as LRU. */ static const FlagsType EVICT_NEXT = 0x04000000; + /** The request should bypass the L1 cache. */ + static const FlagsType BYPASS_L1 = 0x08000000; /** The request should be handled by the generic IPR code (only * valid together with MMAPPED_IPR) */ @@ -651,6 +653,7 @@ bool isClearLL() const { return _flags.isSet(CLEAR_LL); } bool isSecure() const { return _flags.isSet(SECURE); } bool isPTWalk() const { return _flags.isSet(PT_WALK); } + bool isBypassL1() const { return _flags.isSet(BYPASS_L1); } }; #endif // __MEM_REQUEST_HH__ # HG changeset patch # User Lena Olson # Date 1449098819 21600 # Node ID 10ea2c2a71b0890783a59a3b70b8e9365cd76c14 # Parent e4786d785afefce8fa4cb98de330271e5a4fd543 Ruby: Add request type to ruby that bypasses the L1 diff -r e4786d785afe -r 10ea2c2a71b0 src/mem/protocol/RubySlicc_Exports.sm --- a/src/mem/protocol/RubySlicc_Exports.sm Wed Dec 02 17:26:59 2015 -0600 +++ b/src/mem/protocol/RubySlicc_Exports.sm Wed Dec 02 17:26:59 2015 -0600 @@ -136,6 +136,8 @@ NULL, desc="Invalid request type"; FLUSH, desc="Flush request type"; FLUSHALL, desc="Flush everything from the cache"; + LD_Bypass, desc="Load, but bypass the L1"; + ST_Bypass, desc="Store, but bypass the L1"; } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { diff -r e4786d785afe -r 10ea2c2a71b0 src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc Wed Dec 02 17:26:59 2015 -0600 +++ b/src/mem/ruby/system/Sequencer.cc Wed Dec 02 17:26:59 2015 -0600 @@ -488,6 +488,7 @@ assert((request->m_type == RubyRequestType_LD) || (request->m_type == RubyRequestType_IFETCH) || + (request->m_type == RubyRequestType_LD_Bypass) || (request->m_type == RubyRequestType_FLUSHALL)); hitCallback(request, data, true, mach, externalHit, @@ -536,6 +537,7 @@ request_address.getOffset(), pkt->getSize()); } else if (pkt->getPtr(true) != NULL) { if ((type == RubyRequestType_LD) || + (type == RubyRequestType_LD_Bypass) || (type == RubyRequestType_IFETCH) || (type == RubyRequestType_RMW_Read) || (type == RubyRequestType_Locked_RMW_Read) || @@ -646,14 +648,22 @@ primary_type = RubyRequestType_RMW_Read; secondary_type = RubyRequestType_ST; } else { - primary_type = secondary_type = RubyRequestType_LD; + if (pkt->req->isBypassL1()) { + primary_type = secondary_type = RubyRequestType_LD_Bypass; + } else { + primary_type = secondary_type = RubyRequestType_LD; + } } } } else if (pkt->isWrite()) { // // Note: M5 packets do not differentiate ST from RMW_Write // - primary_type = secondary_type = RubyRequestType_ST; + if (pkt->req->isBypassL1()) { + primary_type = secondary_type = RubyRequestType_ST_Bypass; + } else { + primary_type = secondary_type = RubyRequestType_ST; + } } else if (pkt->isFlush()) { if (pkt->cmd == MemCmd::FlushAllReq) { primary_type = secondary_type = RubyRequestType_FLUSHALL; # HG changeset patch # User Lena Olson # Date 1449098820 21600 # Node ID a2d8a04da505d79092627e164893413f99327750 # Parent 10ea2c2a71b0890783a59a3b70b8e9365cd76c14 Adds architectural changes required to handle GPU page faults. This patch makes changes: 1) Adds a GPU page fault register to the CPU core. This is not architecturally visible. This register holds the state of the GPU page fault. Either 0 => Not handling a GPU page fault, or 1 => currently handling a GPU page fault. This register is set by the GPU device MMU before raising a page fault interrupt. 2) Modifies the iret instruction's microcode. Now, when returning from an interrupt, check the GPU page fault register. If the register is 1, then notify the GPU MMU that it's possible a GPU page fault has completed. 3) Adds a gpufaultfinish psuedo-instruction and the microcode implementation. This instruction calls a function on the GPU, gpuFinishPageFault. This function is implemented in gem5-gpu. diff -r 10ea2c2a71b0 -r a2d8a04da505 src/arch/x86/isa/insts/general_purpose/control_transfer/interrupts_and_exceptions.py --- a/src/arch/x86/isa/insts/general_purpose/control_transfer/interrupts_and_exceptions.py Wed Dec 02 17:26:59 2015 -0600 +++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/interrupts_and_exceptions.py Wed Dec 02 17:27:00 2015 -0600 @@ -62,7 +62,13 @@ # Read the handy m5 register for use later rdm5reg t4 + # check if this was a GPU fault and notify the GPU. + rdval t5, "InstRegIndex(MISCREG_GPU_FAULT)" + andi t0, t5, 1, flags=(EZF,) + br label("notGPUFaultFallThrough"), flags=(CEZF,) + gpufaultfinish +notGPUFaultFallThrough: ### ### Handle if we're returning to virtual 8086 mode. ### diff -r 10ea2c2a71b0 -r a2d8a04da505 src/arch/x86/isa/microops/gpu.isa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/arch/x86/isa/microops/gpu.isa Wed Dec 02 17:27:00 2015 -0600 @@ -0,0 +1,91 @@ +// Copyright (c) 2013 Mark D. Hill and David A. Wood +// All rights reserved. +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Jason Power + +output header {{ + void gpuFinishPageFault(int gpuId, ThreadContext *tc); + class GPUFaultFinish : public X86ISA::X86MicroopBase + { + public: + GPUFaultFinish(ExtMachInst _machInst, const char * instMnem, + uint64_t setFlags) : + X86MicroopBase(_machInst, "gpufaultfinish", instMnem, + setFlags | (ULL(1) << StaticInst::IsNonSpeculative), + No_OpClass) + { + } + + %(BasicExecDeclare)s + + std::string generateDisassembly(Addr pc, + const SymbolTable *symtab) const; + }; +}}; + +output exec {{ + Fault + GPUFaultFinish::execute(CPU_EXEC_CONTEXT *xc, + Trace::InstRecord * traceData) const + { + gpuFinishPageFault(0, xc->tcBase()); + return NoFault; + } +}}; + +output decoder {{ + std::string GPUFaultFinish::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { + std::stringstream response; + + printMnemonic(response, instMnem, mnemonic); + + return response.str(); + } +}}; + +let {{ + class GPUFaultFinish(X86Microop): + className = "GPUFaultFinish" + def __init__(self): + pass + + def getAllocator(self, microFlags): + return "new GPUFaultFinish(machInst, macrocodeBlock, %s)" % \ + self.microFlagsText(microFlags) + + microopClasses["gpufaultfinish"] = GPUFaultFinish +}}; diff -r 10ea2c2a71b0 -r a2d8a04da505 src/arch/x86/isa/microops/microops.isa --- a/src/arch/x86/isa/microops/microops.isa Wed Dec 02 17:26:59 2015 -0600 +++ b/src/arch/x86/isa/microops/microops.isa Wed Dec 02 17:27:00 2015 -0600 @@ -61,3 +61,6 @@ //Microops for printing out debug messages through M5 ##include "debug.isa" + +//Microops for interacting with the GPU +##include "gpu.isa" diff -r 10ea2c2a71b0 -r a2d8a04da505 src/arch/x86/regs/misc.hh --- a/src/arch/x86/regs/misc.hh Wed Dec 02 17:26:59 2015 -0600 +++ b/src/arch/x86/regs/misc.hh Wed Dec 02 17:27:00 2015 -0600 @@ -396,6 +396,9 @@ // "Fake" MSRs for internally implemented devices MISCREG_PCI_CONFIG_ADDRESS, + // GPU fault register + MISCREG_GPU_FAULT, + NUM_MISCREGS }; @@ -937,6 +940,14 @@ Bitfield<11> enable; Bitfield<8> bsp; EndBitUnion(LocalApicBase) + + /** + * Register for active GPU page fault + * May need to increase to more bits if more than 1 GPU is in the system + */ + BitUnion64(GPUFaultReg) + Bitfield<0> inFault; + EndBitUnion(GPUFaultReg) } #endif // __ARCH_X86_INTREGS_HH__ # HG changeset patch # User Lena Olson # Date 1449098820 21600 # Node ID 6fcd842605cef42c25a7e02ef3acf8a50735eae4 # Parent a2d8a04da505d79092627e164893413f99327750 Adds an interrupt to x86 that corresponds to a GPU page fault. diff -r a2d8a04da505 -r 6fcd842605ce src/arch/x86/interrupts.cc --- a/src/arch/x86/interrupts.cc Wed Dec 02 17:27:00 2015 -0600 +++ b/src/arch/x86/interrupts.cc Wed Dec 02 17:27:00 2015 -0600 @@ -269,7 +269,9 @@ } else if (!DeliveryMode::isReserved(deliveryMode)) { DPRINTF(LocalApic, "Interrupt is an %s.\n", DeliveryMode::names[deliveryMode]); - if (deliveryMode == DeliveryMode::SMI && !pendingSmi) { + if (deliveryMode == DeliveryMode::GPUFault) { + pendingUnmaskableInt = pendingGpu = true; + } else if (deliveryMode == DeliveryMode::SMI && !pendingSmi) { pendingUnmaskableInt = pendingSmi = true; smiVector = vector; } else if (deliveryMode == DeliveryMode::NMI && !pendingNmi) { @@ -609,7 +611,7 @@ X86ISA::Interrupts::Interrupts(Params * p) : BasicPioDevice(p, PageBytes), IntDevice(this, p->int_latency), - apicTimerEvent(this), + apicTimerEvent(this), pendingGpu(false), pendingSmi(false), smiVector(0), pendingNmi(false), nmiVector(0), pendingExtInt(false), extIntVector(0), @@ -664,7 +666,13 @@ // These are all probably fairly uncommon, so we'll make them easier to // check for. if (pendingUnmaskableInt) { - if (pendingSmi) { + if (pendingGpu) { + DPRINTF(LocalApic, "Generated GPU page fault object.\n"); + Addr addr = tc->readMiscRegNoEffect(MISCREG_GPU_FAULTADDR); + uint32_t code = tc->readMiscRegNoEffect(MISCREG_GPU_FAULTCODE); + assert(((GPUFaultReg)tc->readMiscRegNoEffect(MISCREG_GPU_FAULT)).inFault == 1); + return new PageFault(addr, code); + } else if (pendingSmi) { DPRINTF(LocalApic, "Generated SMI fault object.\n"); return new SystemManagementInterrupt(); } else if (pendingNmi) { diff -r a2d8a04da505 -r 6fcd842605ce src/arch/x86/interrupts.hh --- a/src/arch/x86/interrupts.hh Wed Dec 02 17:27:00 2015 -0600 +++ b/src/arch/x86/interrupts.hh Wed Dec 02 17:27:00 2015 -0600 @@ -117,6 +117,7 @@ * A set of variables to keep track of interrupts that don't go through * the IRR. */ + bool pendingGpu; bool pendingSmi; uint8_t smiVector; bool pendingNmi; @@ -231,6 +232,12 @@ return entry.periodic; } + void + triggerGPUInterrupt() + { + requestInterrupt(0, DeliveryMode::GPUFault, false); + } + AddrRangeList getIntAddrRange() const; BaseMasterPort &getMasterPort(const std::string &if_name, diff -r a2d8a04da505 -r 6fcd842605ce src/arch/x86/intmessage.hh --- a/src/arch/x86/intmessage.hh Wed Dec 02 17:27:00 2015 -0600 +++ b/src/arch/x86/intmessage.hh Wed Dec 02 17:27:00 2015 -0600 @@ -59,12 +59,13 @@ INIT = 5, SIPI = 6, ExtInt = 7, + GPUFault = 8, NumModes }; static const char * const names[NumModes] = { "Fixed", "LowestPriority", "SMI", "Reserved", - "NMI", "INIT", "Startup", "ExtInt" + "NMI", "INIT", "Startup", "ExtInt", "GPUFault" }; static inline bool diff -r a2d8a04da505 -r 6fcd842605ce src/arch/x86/regs/misc.hh --- a/src/arch/x86/regs/misc.hh Wed Dec 02 17:27:00 2015 -0600 +++ b/src/arch/x86/regs/misc.hh Wed Dec 02 17:27:00 2015 -0600 @@ -398,6 +398,8 @@ // GPU fault register MISCREG_GPU_FAULT, + MISCREG_GPU_FAULTADDR, + MISCREG_GPU_FAULTCODE, NUM_MISCREGS }; @@ -948,6 +950,14 @@ BitUnion64(GPUFaultReg) Bitfield<0> inFault; EndBitUnion(GPUFaultReg) + + BitUnion64(GPUFaultCode) + Bitfield<0> present; + Bitfield<1> write; + Bitfield<2> user; + Bitfield<3> reserved; + Bitfield<4> fetch; + EndBitUnion(GPUFaultCode) } #endif // __ARCH_X86_INTREGS_HH__ # HG changeset patch # User Joel Hestness # Date 1449098820 21600 # Node ID 066c06231f9f1bfcae610963c1e6f3126e52407f # Parent 6fcd842605cef42c25a7e02ef3acf8a50735eae4 ARM: Add GPU fault registers These are required to get gem5-gpu to compile and still have mostly ISA-agnostic code in the ShaderMMU. diff -r 6fcd842605ce -r 066c06231f9f src/arch/arm/miscregs.hh --- a/src/arch/arm/miscregs.hh Wed Dec 02 17:27:00 2015 -0600 +++ b/src/arch/arm/miscregs.hh Wed Dec 02 17:27:00 2015 -0600 @@ -672,7 +672,12 @@ MISCREG_A64_UNIMPL, // 603 MISCREG_UNKNOWN, // 604 - NUM_MISCREGS // 605 + // GPU fault register + MISCREG_GPU_FAULT, // 605 + MISCREG_GPU_FAULTADDR, // 606 + MISCREG_GPU_FAULTCODE, // 607 + + NUM_MISCREGS // 608 }; enum MiscRegInfo { @@ -1349,7 +1354,12 @@ "cp14_unimpl", "cp15_unimpl", "a64_unimpl", - "unknown" + "unknown", + + // GPU fault registers + "gpuf", + "gpufaddr", + "gpufcode" }; static_assert(sizeof(miscRegName) / sizeof(*miscRegName) == NUM_MISCREGS, @@ -1839,6 +1849,21 @@ Bitfield<9, 0> res1_9_0_el2; EndBitUnion(CPTR) + /** + * Register for active GPU page fault + * May need to increase to more bits if more than 1 GPU is in the system + */ + BitUnion64(GPUFaultReg) + Bitfield<0> inFault; + EndBitUnion(GPUFaultReg) + + BitUnion64(GPUFaultCode) + Bitfield<0> present; + Bitfield<1> write; + Bitfield<2> user; + Bitfield<3> reserved; + Bitfield<4> fetch; + EndBitUnion(GPUFaultCode) // Checks read access permissions to coproc. registers bool canReadCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, # HG changeset patch # User Joel Hestness # Date 1449098820 21600 # Node ID 79754ba05c2788ea3ec42dcee3c2d2c96ab215b1 # Parent 066c06231f9f1bfcae610963c1e6f3126e52407f ruby: Parameterize connect to IO bus Changes in gem5 (rev. 10116) now automatically connect Ruby sequencers to the IO bus, though not all sequencers are associated with IO-capable controllers (e.g. GPU, copy engine and page walk cache). Add a parameter to sequencers to disable connection to the IO bus. This param must be set to False by sequencers instantiated in gem5-gpu protocol config files. Note: This is a stop-gap fix until further decisions about GPU, copy engine IO capabilities are made diff -r 066c06231f9f -r 79754ba05c27 configs/ruby/Ruby.py --- a/configs/ruby/Ruby.py Wed Dec 02 17:27:00 2015 -0600 +++ b/configs/ruby/Ruby.py Wed Dec 02 17:27:00 2015 -0600 @@ -199,11 +199,15 @@ # Connect the cpu sequencers and the piobus if piobus != None: for cpu_seq in cpu_sequencers: - cpu_seq.pio_master_port = piobus.slave - cpu_seq.mem_master_port = piobus.slave + # gem5-gpu: This is parameterized to not connect components that + # are unable to handle IO messages. This is a stop-gap fix until + # further decisions about GPU, copy engine IO capabilities are made + if cpu_seq.connect_to_io: + cpu_seq.pio_master_port = piobus.slave + cpu_seq.mem_master_port = piobus.slave - if buildEnv['TARGET_ISA'] == "x86": - cpu_seq.pio_slave_port = piobus.master + if buildEnv['TARGET_ISA'] == "x86": + cpu_seq.pio_slave_port = piobus.master ruby._cpu_ports = cpu_sequencers ruby.num_of_sequencers = len(cpu_sequencers) diff -r 066c06231f9f -r 79754ba05c27 src/mem/ruby/system/Sequencer.py --- a/src/mem/ruby/system/Sequencer.py Wed Dec 02 17:27:00 2015 -0600 +++ b/src/mem/ruby/system/Sequencer.py Wed Dec 02 17:27:00 2015 -0600 @@ -70,6 +70,10 @@ deadlock_threshold = Param.Cycles(500000, "max outstanding cycles for a request before deadlock/livelock declared") using_network_tester = Param.Bool(False, "") + # gem5-gpu: This is parameterized to not connect components that + # are unable to handle IO messages. This is a stop-gap fix until + # further decisions about GPU, copy engine IO capabilities are made + connect_to_io = Param.Bool(True, "Whether to connect to IO") class DMASequencer(RubyPort): type = 'DMASequencer' # HG changeset patch # User Joel Hestness # Date 1449098820 21600 # Node ID 7ff5f0cec9babca95ba23f9bfc1103bc80c467b0 # Parent 79754ba05c2788ea3ec42dcee3c2d2c96ab215b1 HACKY! Allow Ruby to Proceed on Func Access Fail When accessing memory functionally, if packets should access the cannonical physical memory anyway (access_phys_mem), there is no need to fail in the event that the cache state is incorrect, because the backing store will be accessed AND hold correct data. Put in a check to dodge this for now. NOTE! THIS PATCH SHOULD BE USED WITH CAUTION: This patch has been tested and shows seemingly correct execution for all benchmarks, but this is still considered a hack, because it side-steps appropriate handling of data in Ruby caches. This side-stepping may result in incorrect benchmark output and mask performance issues of illegitimately designed cache behavior. diff -r 79754ba05c27 -r 7ff5f0cec9ba src/mem/ruby/system/RubyPort.cc --- a/src/mem/ruby/system/RubyPort.cc Wed Dec 02 17:27:00 2015 -0600 +++ b/src/mem/ruby/system/RubyPort.cc Wed Dec 02 17:27:00 2015 -0600 @@ -308,7 +308,7 @@ // Unless the requester explicitly said otherwise, generate an error if // the functional request failed - if (!accessSucceeded && !pkt->suppressFuncError()) { + if (!accessSucceeded && !pkt->suppressFuncError() && !access_phys_mem) { fatal("Ruby functional %s failed for address %#x\n", pkt->isWrite() ? "write" : "read", pkt->getAddr()); } # HG changeset patch # User Lena Olson # Date 1449098820 21600 # Node ID a56e9b3ca22f2a1573df46bc5ad56fbf728b966a # Parent 7ff5f0cec9babca95ba23f9bfc1103bc80c467b0 This patch is used to separate general patches for gem5-gpu and personal patches It is needed so that when new patches are created at the end of the patch queue they are inserted well after any general gem5 patches. This greatly eases merging with changes to gem5-patches. To add a non-personal patch at the end of the patch queue, pop this patch and use qnew. (NOTE: This patch makes no changes to gem5) # HG changeset patch # User Lena Olson # Date 1449098820 21600 # Node ID bb8f525b8704bd6f6dc8d34eba8763110ddad44c # Parent a56e9b3ca22f2a1573df46bc5ad56fbf728b966a imported patch full-system-disk-pointer diff -r a56e9b3ca22f -r bb8f525b8704 configs/common/FSConfig.py --- a/configs/common/FSConfig.py Wed Dec 02 17:27:00 2015 -0600 +++ b/configs/common/FSConfig.py Wed Dec 02 17:27:00 2015 -0600 @@ -431,7 +431,8 @@ disk0 = CowIdeDisk(driveID='master') disk2 = CowIdeDisk(driveID='master') disk0.childImage(mdesc.disk()) - disk2.childImage(disk('linux-bigswap2.img')) + #disk2.childImage(disk('linux-bigswap2.img')) + disk2.childImage(disk('working.img')) self.pc.south_bridge.ide.disks = [disk0, disk2] # Add in a Bios information structure. # HG changeset patch # User Lena Olson # Date 1449098821 21600 # Node ID e833d880e04b389502e00199db2463e234c0dcd0 # Parent bb8f525b8704bd6f6dc8d34eba8763110ddad44c imported patch gdb-flags diff -r bb8f525b8704 -r e833d880e04b SConstruct --- a/SConstruct Wed Dec 02 17:27:00 2015 -0600 +++ b/SConstruct Wed Dec 02 17:27:01 2015 -0600 @@ -539,6 +539,7 @@ # As gcc and clang share many flags, do the common parts here main.Append(CCFLAGS=['-pipe']) main.Append(CCFLAGS=['-fno-strict-aliasing']) + main.Append(CCFLAGS=['-gdwarf-3']) # Enable -Wall and then disable the few warnings that we # consistently violate main.Append(CCFLAGS=['-Wall', '-Wno-sign-compare', '-Wundef']) # HG changeset patch # User Lena Olson # Date 1449098821 21600 # Node ID 2db91b90e03933b343bfab3341c103848f34346c # Parent e833d880e04b389502e00199db2463e234c0dcd0 Add a better error when there is duplicate virtual networks in an .sm file diff -r e833d880e04b -r 2db91b90e039 src/mem/slicc/symbols/StateMachine.py --- a/src/mem/slicc/symbols/StateMachine.py Wed Dec 02 17:27:01 2015 -0600 +++ b/src/mem/slicc/symbols/StateMachine.py Wed Dec 02 17:27:01 2015 -0600 @@ -567,7 +567,9 @@ vnet = var["virtual_network"] vnet_type = var["vnet_type"] - assert (vnet, network) not in vnet_dir_set + if (vnet, network) in vnet_dir_set: + #print vnet_dir_set + self.error("Duplicate entry for vnet(%s) and network(%s)" % (str(vnet), str(network))) vnet_dir_set.add((vnet,network)) code(''' # HG changeset patch # User Lena Olson # Date 1449098821 21600 # Node ID d63c91ba5966b4c10652fa9d6eaa2e5933d6d86d # Parent 2db91b90e03933b343bfab3341c103848f34346c Change error to warning if the C++ to python class mapping doesn't exist diff -r 2db91b90e039 -r d63c91ba5966 src/mem/slicc/symbols/StateMachine.py --- a/src/mem/slicc/symbols/StateMachine.py Wed Dec 02 17:27:01 2015 -0600 +++ b/src/mem/slicc/symbols/StateMachine.py Wed Dec 02 17:27:01 2015 -0600 @@ -220,9 +220,12 @@ code('${{param.ident}} = Param.${{python_type}}(${dflt_str}"")') else: - self.error("Unknown c++ to python class conversion for c++ " \ - "type: '%s'. Please update the python_class_map " \ - "in StateMachine.py", param.type_ast.type.c_ident) + self.warning("Unknown c++ to python class conversion for c++ " \ + "type: '%s'. Please update the python_class_map " \ + "in StateMachine.py\nAssuming same name for python "\ + "class", param.type_ast.type.c_ident) + python_type = param.type_ast.type.c_ident + code('${{param.ident}} = Param.${{python_type}}(${dflt_str}"")') code.dedent() code.write(path, '%s.py' % py_ident) # HG changeset patch # User Lena Olson # Date 1449098821 21600 # Node ID 755143994611e3d42599f4b152e76a76ac8090b7 # Parent d63c91ba5966b4c10652fa9d6eaa2e5933d6d86d imported patch lena diff -r d63c91ba5966 -r 755143994611 configs/common/Benchmarks.py --- a/configs/common/Benchmarks.py Wed Dec 02 17:27:01 2015 -0600 +++ b/configs/common/Benchmarks.py Wed Dec 02 17:27:01 2015 -0600 @@ -54,7 +54,7 @@ elif buildEnv['TARGET_ISA'] == 'alpha': return env.get('LINUX_IMAGE', disk('linux-latest.img')) elif buildEnv['TARGET_ISA'] == 'x86': - return env.get('LINUX_IMAGE', disk('x86root.img')) + return env.get('LINUX_IMAGE', disk('linux-x86.img')) elif buildEnv['TARGET_ISA'] == 'arm': return env.get('LINUX_IMAGE', disk('linux-arm-ael.img')) else: diff -r d63c91ba5966 -r 755143994611 configs/common/SysPaths.py --- a/configs/common/SysPaths.py Wed Dec 02 17:27:01 2015 -0600 +++ b/configs/common/SysPaths.py Wed Dec 02 17:27:01 2015 -0600 @@ -50,7 +50,7 @@ try: path = env['M5_PATH'].split(':') except KeyError: - path = [ '/dist/m5/system', '/n/poolfs/z/dist/m5/system' ] + path = [ '/p/multifacet/users/lena/gem5-gpu/' ] for system.dir in path: if os.path.isdir(system.dir): # HG changeset patch # User Lena Olson # Date 1449098822 21600 # Node ID 881ecce573ae277a5ef58ad475a36b928138d6d5 # Parent 755143994611e3d42599f4b152e76a76ac8090b7 Ruby: Add parameter to sequencer to control if it can restore from checkpoints diff -r 755143994611 -r 881ecce573ae src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc Wed Dec 02 17:27:01 2015 -0600 +++ b/src/mem/ruby/system/Sequencer.cc Wed Dec 02 17:27:02 2015 -0600 @@ -70,6 +70,8 @@ assert(m_dataCache_ptr != NULL); m_usingNetworkTester = p->using_network_tester; + + m_canRestore = p->can_restore; } Sequencer::~Sequencer() diff -r 755143994611 -r 881ecce573ae src/mem/ruby/system/Sequencer.hh --- a/src/mem/ruby/system/Sequencer.hh Wed Dec 02 17:27:01 2015 -0600 +++ b/src/mem/ruby/system/Sequencer.hh Wed Dec 02 17:27:02 2015 -0600 @@ -149,6 +149,9 @@ Stats::Counter getIncompleteTimes(const MachineType t) const { return m_IncompleteTimes[t]; } + bool canRestore() const + { return m_canRestore; } + private: void issueRequest(PacketPtr pkt, RubyRequestType type); @@ -195,6 +198,8 @@ bool m_usingNetworkTester; + bool m_canRestore; + //! Histogram for number of outstanding requests per cycle. Stats::Histogram m_outstandReqHist; diff -r 755143994611 -r 881ecce573ae src/mem/ruby/system/Sequencer.py --- a/src/mem/ruby/system/Sequencer.py Wed Dec 02 17:27:01 2015 -0600 +++ b/src/mem/ruby/system/Sequencer.py Wed Dec 02 17:27:02 2015 -0600 @@ -74,6 +74,7 @@ # are unable to handle IO messages. This is a stop-gap fix until # further decisions about GPU, copy engine IO capabilities are made connect_to_io = Param.Bool(True, "Whether to connect to IO") + can_restore = Param.Bool(True, "true if this Sequencer can restore ckpt") class DMASequencer(RubyPort): type = 'DMASequencer' diff -r 755143994611 -r 881ecce573ae src/mem/ruby/system/System.cc --- a/src/mem/ruby/system/System.cc Wed Dec 02 17:27:01 2015 -0600 +++ b/src/mem/ruby/system/System.cc Wed Dec 02 17:27:02 2015 -0600 @@ -37,6 +37,7 @@ #include "debug/RubySystem.hh" #include "mem/ruby/common/Address.hh" #include "mem/ruby/network/Network.hh" +#include "mem/ruby/system/Sequencer.hh" #include "mem/ruby/system/System.hh" #include "sim/eventq.hh" #include "sim/simulate.hh" @@ -105,6 +106,7 @@ m_abs_cntrl_vec.push_back(cntrl); MachineID id = cntrl->getMachineID(); + g_abs_controls[id.getType()][id.getNum()] = cntrl; } @@ -320,8 +322,13 @@ vector sequencer_map; Sequencer* t = NULL; for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { - sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); - if (t == NULL) t = sequencer_map[cntrl]; + Sequencer *s = m_abs_cntrl_vec[cntrl]->getSequencer(); + if (s != NULL && s->canRestore()) { + sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); + if (t == NULL) t = sequencer_map[cntrl]; + } else { + sequencer_map.push_back(NULL); + } } assert(t != NULL); # HG changeset patch # User Lena Olson # Date 1449098822 21600 # Node ID fbdb503894fb81808370b17788136f437b04e29c # Parent 881ecce573ae277a5ef58ad475a36b928138d6d5 imported patch sequencer_change diff -r 881ecce573ae -r fbdb503894fb src/mem/ruby/system/CacheRecorder.cc --- a/src/mem/ruby/system/CacheRecorder.cc Wed Dec 02 17:27:02 2015 -0600 +++ b/src/mem/ruby/system/CacheRecorder.cc Wed Dec 02 17:27:02 2015 -0600 @@ -130,7 +130,12 @@ Packet *pkt = new Packet(req, requestType); pkt->dataStatic(traceRecord->m_data + rec_bytes_read); - Sequencer* m_sequencer_ptr = m_seq_map[traceRecord->m_cntrl_id]; + int id = traceRecord->m_cntrl_id; + if (id >= m_seq_map.size()) { + id = 0; + } + + Sequencer* m_sequencer_ptr = m_seq_map[id]; assert(m_sequencer_ptr != NULL); m_sequencer_ptr->makeRequest(pkt); } # HG changeset patch # User Lena Olson # Date 1449098822 21600 # Node ID e1974fafe9880d91a2659c00fe16f26ccaf57240 # Parent fbdb503894fb81808370b17788136f437b04e29c imported patch add_vaddr_to_cache diff -r fbdb503894fb -r e1974fafe988 src/mem/protocol/MOESI_hammer-msg.sm --- a/src/mem/protocol/MOESI_hammer-msg.sm Wed Dec 02 17:27:02 2015 -0600 +++ b/src/mem/protocol/MOESI_hammer-msg.sm Wed Dec 02 17:27:02 2015 -0600 @@ -94,6 +94,7 @@ NetDest Destination, desc="Multicast destination mask"; MessageSizeType MessageSize, desc="size category of the message"; bool DirectedProbe, default="false", desc="probe filter directed probe"; + Address VAddr, desc="Virtual address for this request"; Cycles InitialRequestTime, default="Cycles(0)", desc="time the initial requests was sent from the L1Cache"; diff -r fbdb503894fb -r e1974fafe988 src/mem/ruby/slicc_interface/RubyRequest.hh --- a/src/mem/ruby/slicc_interface/RubyRequest.hh Wed Dec 02 17:27:02 2015 -0600 +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh Wed Dec 02 17:27:02 2015 -0600 @@ -42,6 +42,7 @@ { public: Address m_PhysicalAddress; + Address m_VirtualAddress; Address m_LineAddress; RubyRequestType m_Type; Address m_ProgramCounter; @@ -52,12 +53,13 @@ PacketPtr pkt; unsigned m_contextId; - RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, + RubyRequest(Tick curTime, uint64_t _paddr, uint64_t _vaddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No, unsigned _proc_id = 100) : Message(curTime), m_PhysicalAddress(_paddr), + m_VirtualAddress(_vaddr), m_Type(_type), m_ProgramCounter(_pc), m_AccessMode(_access_mode), @@ -81,6 +83,7 @@ const RubyAccessMode& getAccessMode() const { return m_AccessMode; } const int& getSize() const { return m_Size; } const PrefetchBit& getPrefetch() const { return m_Prefetch; } + const Address& getVirtualAddress() const { return m_VirtualAddress; } void writeData(DataBlock& block) const diff -r fbdb503894fb -r e1974fafe988 src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc Wed Dec 02 17:27:02 2015 -0600 +++ b/src/mem/ruby/system/Sequencer.cc Wed Dec 02 17:27:02 2015 -0600 @@ -702,7 +702,17 @@ pc = pkt->req->getPC(); } + Addr vaddr = 0; + // get a valid virtual address for the gpu caches + if (pkt->req->hasVaddr()){ + vaddr = pkt->req->getVaddr(); + } + //else { + // printf("Packet missing vaddr has pc %lx and paddr %lx\n", pc, pkt->getAddr()); + //} + RubyRequest *msg = new RubyRequest(clockEdge(), pkt->getAddr(), + vaddr, pkt->getPtr(true), pkt->getSize(), pc, secondary_type, RubyAccessMode_Supervisor, pkt,