44 template<
typename DestDataType,
typename AddrRegOperandType>
50 this->dest.disassemble(),
51 this->
addr.disassemble());
54 template<
typename DestDataType,
typename AddrRegOperandType>
60 typedef typename DestDataType::CType CType
M5_VAR_USED;
64 this->
addr.calcVector(w, addr_vec);
68 this->dest.set(w, lane, addr_vec[lane]);
74 template<
typename MemDataType,
typename DestDataType,
75 typename AddrRegOperandType>
79 switch (num_dest_operands) {
84 this->dest.disassemble(),
85 this->
addr.disassemble());
91 this->dest_vect[0].disassemble(),
92 this->dest_vect[1].disassemble(),
93 this->
addr.disassemble());
96 this->disassembly =
csprintf(
"%s_%s_%s (%s,%s,%s), %s", this->
opcode,
99 this->dest_vect[0].disassemble(),
100 this->dest_vect[1].disassemble(),
101 this->dest_vect[2].disassemble(),
102 this->
addr.disassemble());
105 this->disassembly =
csprintf(
"%s_%s_%s (%s,%s,%s,%s), %s",
109 this->dest_vect[0].disassemble(),
110 this->dest_vect[1].disassemble(),
111 this->dest_vect[2].disassemble(),
112 this->dest_vect[3].disassemble(),
113 this->
addr.disassemble());
116 fatal(
"Bad ld register dest operand, num vector operands: %d \n",
149 Addr addr_div8 = addr / 8;
150 Addr addr_mod8 = addr % 8;
155 assert(ret < w->privBase +
161 template<
typename MemDataType,
typename DestDataType,
162 typename AddrRegOperandType>
164 LdInst<MemDataType, DestDataType,
169 typedef typename MemDataType::CType MemCType;
180 assert(num_dest_operands == 1);
184 uint64_t address = this->
addr.calcUniform();
188 DPRINTF(HSAIL,
"ld_kernarg [%d] -> %d\n", address, val);
192 this->dest.set(w, lane, val);
198 uint64_t address = this->
addr.calcUniform();
203 DPRINTF(HSAIL,
"ld_arg [%d] -> %llu\n", address,
204 (
unsigned long long)val);
206 this->dest.set(w, lane, val);
215 this->
addr.calcVector(w, m->addr);
217 m->m_type = MemDataType::memType;
218 m->v_type = DestDataType::vgprType;
221 m->statusBitVector = 0;
222 m->equiv = this->equivClass;
224 if (num_dest_operands == 1) {
225 m->dst_reg = this->dest.regIndex();
228 m->n_reg = num_dest_operands;
229 for (
int i = 0;
i < num_dest_operands; ++
i) {
230 m->dst_reg_vec[
i] = this->dest_vect[
i].regIndex();
241 switch (this->segment) {
256 assert(!((
sizeof(MemCType) - 1) & m->addr[lane]));
261 m->addr[lane] = privAddr;
272 assert(num_dest_operands == 1);
283 m->addr[lane] = m->addr[lane] * w->
spillWidth +
310 assert(m->addr[lane] +
sizeof(MemCType) <= w->
roSize);
311 m->addr[lane] += w->
roBase;
328 m->addr[lane] = m->addr[lane] +
329 lane *
sizeof(MemCType) + w->
privBase;
339 fatal(
"Load to unsupported segment %d %llxe\n", this->segment,
347 template<
typename OperationType,
typename SrcDataType,
348 typename AddrRegOperandType>
350 StInst<OperationType, SrcDataType,
355 typedef typename OperationType::CType CType;
364 uint64_t address = this->
addr.calcUniform();
368 CType
data = this->src.template get<CType>(
w, lane);
369 DPRINTF(HSAIL,
"st_arg [%d] <- %d\n", address, data);
370 w->writeCallArgMem<CType>(lane, address,
data);
381 this->
addr.calcVector(w, m->addr);
383 if (num_src_operands == 1) {
386 ((CType*)m->d_data)[lane] =
387 this->src.template get<CType>(
w, lane);
391 for (
int k= 0;
k < num_src_operands; ++
k) {
395 this->src_vect[
k].template get<CType>(
w, lane);
401 m->m_type = OperationType::memType;
402 m->v_type = OperationType::vgprType;
404 m->statusBitVector = 0;
405 m->equiv = this->equivClass;
407 if (num_src_operands == 1) {
410 m->n_reg = num_src_operands;
420 switch (this->segment) {
433 assert(!((
sizeof(CType)-1) & m->addr[lane]));
438 m->addr[lane] = privAddr;
449 assert(num_src_operands == 1);
457 m->addr[lane] = m->addr[lane] * w->
spillWidth +
483 m->addr[lane] = m->addr[lane] + lane *
495 fatal(
"Store to unsupported segment %d\n", this->segment);
502 template<
typename OperationType,
typename SrcDataType,
503 typename AddrRegOperandType>
505 StInst<OperationType, SrcDataType,
506 AddrRegOperandType>::generateDisassembly()
508 switch (num_src_operands) {
512 OperationType::label,
513 this->src.disassemble(),
514 this->
addr.disassemble());
517 this->disassembly =
csprintf(
"%s_%s_%s (%s,%s), %s", this->
opcode,
519 OperationType::label,
520 this->src_vect[0].disassemble(),
521 this->src_vect[1].disassemble(),
522 this->
addr.disassemble());
525 this->disassembly =
csprintf(
"%s_%s_%s (%s,%s,%s,%s), %s",
528 OperationType::label,
529 this->src_vect[0].disassemble(),
530 this->src_vect[1].disassemble(),
531 this->src_vect[2].disassemble(),
532 this->src_vect[3].disassemble(),
533 this->
addr.disassemble());
535 default:
fatal(
"Bad ld register src operand, num vector operands: "
536 "%d \n", num_src_operands);
541 template<
typename DataType,
typename AddrRegOperandType,
int NumSrcOperands,
544 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
547 typedef typename DataType::CType CType;
553 this->
addr.calcVector(w, m->addr);
556 ((CType *)m->a_data)[lane] =
557 this->src[0].template get<CType>(
w, lane);
561 if (NumSrcOperands > 1) {
563 ((CType*)m->x_data)[lane] =
564 this->src[1].template get<CType>(
w, lane);
568 assert(NumSrcOperands <= 2);
570 m->m_type = DataType::memType;
571 m->v_type = DataType::vgprType;
574 m->statusBitVector = 0;
579 m->dst_reg = this->dest.regIndex();
589 switch (this->segment) {
612 fatal(
"Atomic op to unsupported segment %d\n",
622 template<
typename DataType,
typename AddrRegOperandType,
int NumSrcOperands,
625 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
626 HasDst>::generateDisassembly()
633 DataType::label, this->dest.disassemble(),
634 this->
addr.disassemble());
640 DataType::label, this->
addr.disassemble());
643 for (
int i = 0;
i < NumSrcOperands; ++
i) {
644 this->disassembly +=
",";
645 this->disassembly += this->src[
i].disassemble();
Tick ticks(int numCycles) const
void generateDisassembly() override
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
void generateDisassembly()
GlobalMemPipeline globalMemoryPipe
std::shared_ptr< GPUDynInst > GPUDynInstPtr
std::string csprintf(const char *format, const Args &...args)
std::queue< GPUDynInstPtr > & getLMReqFIFO()
CType readCallArgMem(int lane, int addr)
ComputeUnit * computeUnit
uint32_t outstandingReqsWrLm
uint32_t outstandingReqsRdGm
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint32_t outstandingReqsRdLm
static const int NumArgumentRegs M5_VAR_USED
uint32_t outstandingReqsWrGm
std::vector< Addr > lastAddr
VectorMask execMask() const
void issueRequest(GPUDynInstPtr gpuDynInst)
issues a request to the pipeline - i.e., enqueue it in the request buffer.
uint32_t spillSizePerItem
const char * atomicOpToString(BrigAtomicOperation brigOp)
Bitfield< 24, 21 > opcode
LocalMemPipeline localMemoryPipe
void execute(GPUDynInstPtr gpuDynInst)
const char * segmentNames[]
static Addr calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)