45 #ifndef __CPU_O3_LSQ_UNIT_HH__
46 #define __CPU_O3_LSQ_UNIT_HH__
54 #include "arch/isa_traits.hh"
55 #include "arch/locked_mem.hh"
56 #include "arch/mmapped_ipr.hh"
57 #include "config/the_isa.hh"
60 #include "debug/LSQUnit.hh"
64 struct DerivO3CPUParams;
81 typedef typename Impl::O3CPU
O3CPU;
83 typedef typename Impl::CPUPol::IEW
IEW;
84 typedef typename Impl::CPUPol::LSQ
LSQ;
92 void init(
O3CPU *cpu_ptr,
IEW *iew_ptr, DerivO3CPUParams *params,
93 LSQ *lsq_ptr,
unsigned maxLQEntries,
unsigned maxSQEntries,
97 std::string
name()
const;
251 inline void incrStIdx(
int &store_idx)
const;
253 inline void decrStIdx(
int &store_idx)
const;
255 inline void incrLdIdx(
int &load_idx)
const;
257 inline void decrLdIdx(
int &load_idx)
const;
518 uint8_t *
data,
int store_idx);
550 template <
class Impl>
559 assert(!load_inst->isExecuted());
566 (load_idx != loadHead || !load_inst->isAtCommit())) {
567 iewStage->rescheduleMemInst(load_inst);
568 ++lsqRescheduledLoads;
570 load_inst->seqNum, load_inst->pcState());
580 return std::make_shared<GenericISA::M5PanicFault>(
581 "Strictly ordered load [sn:%llx] PC %s\n",
582 load_inst->seqNum, load_inst->pcState());
586 int store_idx = load_inst->sqIdx;
591 "storeHead: %i addr: %#x%s\n",
592 load_idx, store_idx, storeHead, req->
getPaddr(),
593 sreqLow ?
" split" :
"");
600 load_inst->recordResult(
false);
602 load_inst->recordResult(
true);
606 assert(!load_inst->memData);
607 load_inst->memData =
new uint8_t[64];
621 fst_data_pkt->dataStatic(load_inst->memData);
635 cpu->schedule(wb, cpu->clockEdge(delay));
639 while (store_idx != -1) {
641 if (store_idx == storeWBIdx) {
647 store_idx += SQEntries;
649 assert(storeQueue[store_idx].inst);
651 store_size = storeQueue[store_idx].size;
655 else if (storeQueue[store_idx].inst->strictlyOrdered())
658 assert(storeQueue[store_idx].inst->effAddrValid());
662 bool store_has_lower_limit =
663 req->
getVaddr() >= storeQueue[store_idx].inst->effAddr;
664 bool store_has_upper_limit =
666 (storeQueue[store_idx].inst->effAddr + store_size);
667 bool lower_load_has_store_part =
668 req->
getVaddr() < (storeQueue[store_idx].inst->effAddr +
670 bool upper_load_has_store_part =
672 storeQueue[store_idx].inst->effAddr;
676 if (store_has_lower_limit && store_has_upper_limit && !req->
isLLSC()) {
678 int shift_amt = req->
getVaddr() - storeQueue[store_idx].inst->effAddr;
681 if (!load_inst->memData) {
682 load_inst->memData =
new uint8_t[req->
getSize()];
684 if (storeQueue[store_idx].isAllZeros)
685 memset(load_inst->memData, 0, req->
getSize());
687 memcpy(load_inst->memData,
688 storeQueue[store_idx].data + shift_amt, req->
getSize());
691 "addr %#x\n", store_idx, req->
getVaddr());
694 data_pkt->dataStatic(load_inst->memData);
713 ((store_has_lower_limit && lower_load_has_store_part) ||
714 (store_has_upper_limit && upper_load_has_store_part) ||
715 (lower_load_has_store_part && upper_load_has_store_part))) ||
717 ((store_has_lower_limit || upper_load_has_store_part) &&
718 (store_has_upper_limit || lower_load_has_store_part)))) {
726 if (storeQueue[store_idx].completed) {
727 panic(
"Should not check one of these");
736 loadQueue[stallingLoadIdx]->seqNum)) {
738 stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
739 stallingLoadIdx = load_idx;
744 iewStage->rescheduleMemInst(load_inst);
745 load_inst->clearIssued();
746 ++lsqRescheduledLoads;
751 "Store idx %i to load addr %#x\n",
768 DPRINTF(
LSQUnit,
"Doing memory access for inst [sn:%lli] PC %s\n",
769 load_inst->seqNum, load_inst->pcState());
772 if (!load_inst->memData) {
773 load_inst->memData =
new uint8_t[req->
getSize()];
777 bool completedFirst =
false;
786 state->
idx = load_idx;
787 state->
inst = load_inst;
792 fst_data_pkt = data_pkt;
814 bool successful_load =
true;
815 if (!dcachePort->sendTimingReq(fst_data_pkt)) {
816 successful_load =
false;
818 completedFirst =
true;
827 if (!dcachePort->sendTimingReq(snd_data_pkt)) {
832 successful_load =
false;
838 if (!successful_load) {
847 if (!completedFirst) {
871 iewStage->blockMemInst(load_inst);
880 template <
class Impl>
883 uint8_t *
data,
int store_idx)
885 assert(storeQueue[store_idx].inst);
888 " | storeHead:%i [sn:%i]\n",
889 store_idx, req->
getPaddr(), storeHead,
890 storeQueue[store_idx].inst->seqNum);
892 storeQueue[store_idx].req = req;
893 storeQueue[store_idx].sreqLow = sreqLow;
894 storeQueue[store_idx].sreqHigh = sreqHigh;
896 storeQueue[store_idx].size =
size;
898 assert(size <=
sizeof(storeQueue[store_idx].data) ||
904 storeQueue[store_idx].isSplit =
true;
908 memcpy(storeQueue[store_idx].data, data, size);
915 #endif // __CPU_O3_LSQ_UNIT_HH__
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
int getStoreHead()
Returns the index of the head store instruction.
unsigned LQEntries
The number of LQ entries, plus a sentinel entry (circular queue).
MasterPort * dcachePort
Pointer to the dcache port.
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
bool isEmpty() const
Returns if both the LQ and SQ are empty.
Impl::DynInstPtr DynInstPtr
Fault checkViolations(int load_idx, DynInstPtr &inst)
Check for ordering violations in the LSQ.
int usedStorePorts
The number of used cache ports in this cycle by stores.
int getLoadHead()
Returns the index of the head load instruction.
decltype(nullptr) constexpr NoFault
Cycles is a wrapper class for representing cycle counts, i.e.
Stats::Scalar invAddrSwpfs
Total number of software prefetches ignored due to invalid addresses.
bool hasStoresToWB()
Returns if there are any stores to writeback.
void resizeSQ(unsigned size)
Resizes the SQ to a given size.
uint8_t idx
The LQ/SQ index of the instruction.
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
LSQSenderState()
Default constructor.
int numStores()
Returns the number of stores in the SQ.
Stats::Scalar lsqForwLoads
Total number of loads forwaded from LSQ stores.
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
void drainSanityCheck() const
Perform sanity checks after a drain.
std::vector< SQEntry > storeQueue
The store queue.
bool violation()
Returns if there is a memory ordering violation.
bool sqEmpty() const
Returns if the SQ is empty.
bool isStrictlyOrdered() const
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
void decrStIdx(int &store_idx) const
Decrements the given store index (circular queue).
void completeStore(int store_idx)
Completes the store at the specified index.
int cacheStorePorts
The number of cache ports available each cycle (stores only).
Writeback event, specifically for when stores forward data to loads.
TimeBuffer< IssueStruct >::wire fromIssue
Wire to read information from the issue stage time queue.
const bool HasUnalignedMemAcc
char data[16]
The store data.
bool isMmappedIpr() const
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Stats::Scalar lsqRescheduledLoads
Number of loads that were rescheduled.
int storeTail
The index of the tail instruction in the SQ.
void resetState()
Reset the LSQ state.
Derived class to hold any sender state the LSQ needs.
void writebackPendingStore()
Writes back a store that couldn't be completed the previous cycle.
SQEntry()
Constructs an empty store queue entry.
This is a write that is targeted and zeroing an entire cache block.
void resizeLQ(unsigned size)
Resizes the LQ to a given size.
void regStats()
Registers statistics.
bool isFull()
Returns if either the LQ or SQ is full.
int numLoads()
Returns the number of loads in the LQ.
Cycles handleIprRead(ThreadContext *xc, Packet *pkt)
Helper function to handle IPRs when the target architecture doesn't need its own IPR handling...
Stats::Scalar invAddrLoads
Total number of loads ignored due to invalid addresses.
ThreadContext is the external interface to all thread state for anything outside of the CPU...
void insertLoad(DynInstPtr &load_inst)
Inserts a load instruction.
Stats::Scalar lsqIgnoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Stats::Scalar lsqSquashedLoads
Total number of squashed loads.
void recvRetry()
Handles doing the retry.
Fault read(Request *req, Request *sreqLow, Request *sreqHigh, int load_idx)
Executes the load at the given index.
This is a simple scalar statistic, like a counter.
int loadTail
The index of the tail instruction in the LQ.
DynInstPtr inst
Instruction whose results are being written back.
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
void takeOverFrom()
Takes over from another CPU's thread.
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
std::string name() const
Returns the name of the LSQ unit.
void decrLdIdx(int &load_idx) const
Decrements the given load index (circular queue).
ThreadID lsqID
The LSQUnit thread id.
bool cacheBlocked
Whether or not the second packet of this split load was blocked.
void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id)
Initializes the LSQ unit with the specified number of entries.
InstSeqNum getStoreHeadSeqNum()
Returns the sequence number of the head store instruction.
bool hasPendingPkt
Whether or not there is a packet that couldn't be sent because of a lack of cache ports...
bool storeInFlight
Whether or not a store is in flight.
void incrLdIdx(int &load_idx) const
Increments the given load index (circular queue).
Tick curTick()
The current simulated tick.
bool willWB()
Returns if the LSQ unit will writeback on this cycle.
SQEntry(DynInstPtr &_inst)
Constructs a store queue entry for a given instruction.
bool noWB
Whether or not the instruction will need to writeback.
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
IEW * iewStage
Pointer to the IEW stage.
bool pktToSend
Whether or not there is a packet that needs sending.
bool lqFull()
Returns if the LQ is full.
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
bool isAllZeros
Does this request write all zeros and thus doesn't have any data attached to it.
void storePostSend(PacketPtr pkt)
Handles completing the send of a store to memory.
bool completed
Whether or not the store is completed.
bool stalled
Whether or not the LSQ is stalled.
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
bool isLoad
Whether or not it is a load.
bool complete()
Completes a packet and returns whether the access is finished.
void commitLoad()
Commits the head load.
void insert(DynInstPtr &inst)
Inserts an instruction.
int stallingLoadIdx
The index of the above store.
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
InstSeqNum getLoadHeadSeqNum()
Returns the sequence number of the head load instruction.
int loadHead
The index of the head instruction in the LQ.
LSQUnit()
Constructs an LSQ unit.
Fault executeLoad(int lq_idx)
void setDcachePort(MasterPort *dcache_port)
Sets the pointer to the dcache port.
int storeHead
The index of the head instruction in the SQ.
PacketPtr pendingPkt
The packet that is pending free cache ports.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
RequestPtr sreqLow
The split requests for the store.
LSQUnit< Impl > * lsqPtr
The pointer to the LSQ unit that issued the store.
bool isSplit
Whether or not this access is split in two.
Fault write(Request *req, Request *sreqLow, Request *sreqHigh, uint8_t *data, int store_idx)
Executes the store at the given index.
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
bool isSplit
Whether or not the store is split into two requests.
PacketPtr retryPkt
The packet that needs to be retried.
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a MemObject that sees the packet.
Fault executeLoad(DynInstPtr &inst)
Executes a load instruction.
Stats::Scalar lsqSquashedStores
Total number of squashed stores.
bool sqFull()
Returns if the SQ is full.
void clearSQ()
Clears all the entries in the SQ.
Fault executeStore(DynInstPtr &inst)
Executes a store instruction.
Flags getFlags()
Accessor for flags.
LSQ * lsq
Pointer to the LSQ.
int16_t ThreadID
Thread index/ID type.
int stores
The number of store instructions in the SQ.
DynInstPtr inst
The store instruction.
void incrStIdx(int &store_idx) const
Increments the given store index (circular queue).
bool checkLoads
Should loads be checked for dependency issues.
Declaration of the Packet class.
std::vector< DynInstPtr > loadQueue
The load queue.
PacketPtr pkt
The packet that would have been sent to memory.
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
unsigned getCount()
Returns the number of instructions in the LSQ.
SenderState * senderState
This packet's sender state.
void process()
Processes the writeback event.
int storeWBIdx
The index of the first instruction that may be ready to be written back, and has not yet been written...
int storesToWB
The number of store instructions in the SQ waiting to writeback.
RequestPtr req
The request for the store.
void writebackStores()
Writes back stores.
bool sendStore(PacketPtr data_pkt)
Attempts to send a store to the cache.
PacketPtr pendingPacket
A second packet from a split store that needs sending.
bool needsTSO
Flag for memory model.
int loads
The number of load instructions in the LQ.
static PacketPtr createRead(const RequestPtr req)
Constructor-like methods that return Packets based on Request objects.
O3CPU * cpu
Pointer to the CPU.
void insertStore(DynInstPtr &store_inst)
Inserts a store instruction.
Stats::Scalar lsqBlockedLoads
Ready loads blocked due to partial store-forwarding.
PacketPtr mainPkt
The main packet from a split load, used during writeback.
const char * description() const
Returns the description of this event.
bool canWB
Whether or not the store can writeback.
Stats::Scalar lsqCacheBlocked
Number of times the LSQ is blocked due to the cache.
unsigned SQEntries
The number of SQ entries, plus a sentinel entry (circular queue).
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations...
void clearLQ()
Clears all the entries in the LQ.
uint8_t outstanding
Number of outstanding packets to complete.
Class that implements the actual LQ and SQ for each specific thread.
Impl::CPUPol::IssueStruct IssueStruct
void writeback(DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
bool committed
Whether or not the store is committed.
DynInstPtr inst
Instruction who initiated the access to memory.
std::shared_ptr< FaultBase > Fault
uint8_t size
The size of the store.
bool lqEmpty() const
Returns if the LQ is empty.
if(it_gpu==gpuTypeMap.end())
void handleLockedRead(XC *xc, Request *req)
Stats::Scalar lsqMemOrderViolation
Tota number of memory ordering violations.
int numStoresToWB()
Returns the number of stores to writeback.
ProbePointArg< PacketInfo > Packet
Packet probe point.
void tick()
Ticks the LSQ unit, which in this case only resets the number of used cache ports.
Addr cacheBlockMask
Address Mask for a cache block (e.g.