gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
macromem.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2014 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Copyright (c) 2007-2008 The Florida State University
15  * All rights reserved.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions are
19  * met: redistributions of source code must retain the above copyright
20  * notice, this list of conditions and the following disclaimer;
21  * redistributions in binary form must reproduce the above copyright
22  * notice, this list of conditions and the following disclaimer in the
23  * documentation and/or other materials provided with the distribution;
24  * neither the name of the copyright holders nor the names of its
25  * contributors may be used to endorse or promote products derived from
26  * this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39  *
40  * Authors: Stephen Hines
41  */
42 
44 
45 #include <sstream>
46 
47 #include "arch/arm/generated/decoder.hh"
49 
50 using namespace std;
51 using namespace ArmISAInst;
52 
53 namespace ArmISA
54 {
55 
56 MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
57  OpClass __opClass, IntRegIndex rn,
58  bool index, bool up, bool user, bool writeback,
59  bool load, uint32_t reglist) :
60  PredMacroOp(mnem, machInst, __opClass)
61 {
62  uint32_t regs = reglist;
63  uint32_t ones = number_of_ones(reglist);
64  uint32_t mem_ops = ones;
65 
66  // Copy the base address register if we overwrite it, or if this instruction
67  // is basically a no-op (we have to do something)
68  bool copy_base = (bits(reglist, rn) && load) || !ones;
69  bool force_user = user & !bits(reglist, 15);
70  bool exception_ret = user & bits(reglist, 15);
71  bool pc_temp = load && writeback && bits(reglist, 15);
72 
73  if (!ones) {
74  numMicroops = 1;
75  } else if (load) {
76  numMicroops = ((ones + 1) / 2)
77  + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
78  + (copy_base ? 1 : 0)
79  + (writeback? 1 : 0)
80  + (pc_temp ? 1 : 0);
81  } else {
82  numMicroops = ones + (writeback ? 1 : 0);
83  }
84 
86 
87  uint32_t addr = 0;
88 
89  if (!up)
90  addr = (ones << 2) - 4;
91 
92  if (!index)
93  addr += 4;
94 
95  StaticInstPtr *uop = microOps;
96 
97  // Add 0 to Rn and stick it in ureg0.
98  // This is equivalent to a move.
99  if (copy_base)
100  *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
101 
102  unsigned reg = 0;
103  while (mem_ops != 0) {
104  // Do load operations in pairs if possible
105  if (load && mem_ops >= 2 &&
106  !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
107  // 64-bit memory operation
108  // Find 2 set register bits (clear them after finding)
109  unsigned reg_idx1;
110  unsigned reg_idx2;
111 
112  // Find the first register
113  while (!bits(regs, reg)) reg++;
114  replaceBits(regs, reg, 0);
115  reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
116 
117  // Find the second register
118  while (!bits(regs, reg)) reg++;
119  replaceBits(regs, reg, 0);
120  reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
121 
122  // Load into temp reg if necessary
123  if (reg_idx2 == INTREG_PC && pc_temp)
124  reg_idx2 = INTREG_UREG1;
125 
126  // Actually load both registers from memory
127  *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
128  copy_base ? INTREG_UREG0 : rn, up, addr);
129 
130  if (!writeback && reg_idx2 == INTREG_PC) {
131  // No writeback if idx==pc, set appropriate flags
132  (*uop)->setFlag(StaticInst::IsControl);
133  (*uop)->setFlag(StaticInst::IsIndirectControl);
134 
135  if (!(condCode == COND_AL || condCode == COND_UC))
136  (*uop)->setFlag(StaticInst::IsCondControl);
137  else
138  (*uop)->setFlag(StaticInst::IsUncondControl);
139  }
140 
141  if (up) addr += 8;
142  else addr -= 8;
143  mem_ops -= 2;
144  } else {
145  // 32-bit memory operation
146  // Find register for operation
147  unsigned reg_idx;
148  while (!bits(regs, reg)) reg++;
149  replaceBits(regs, reg, 0);
150  reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
151 
152  if (load) {
153  if (writeback && reg_idx == INTREG_PC) {
154  // If this instruction changes the PC and performs a
155  // writeback, ensure the pc load/branch is the last uop.
156  // Load into a temp reg here.
157  *uop = new MicroLdrUop(machInst, INTREG_UREG1,
158  copy_base ? INTREG_UREG0 : rn, up, addr);
159  } else if (reg_idx == INTREG_PC && exception_ret) {
160  // Special handling for exception return
161  *uop = new MicroLdrRetUop(machInst, reg_idx,
162  copy_base ? INTREG_UREG0 : rn, up, addr);
163  } else {
164  // standard single load uop
165  *uop = new MicroLdrUop(machInst, reg_idx,
166  copy_base ? INTREG_UREG0 : rn, up, addr);
167  }
168 
169  // Loading pc as last operation? Set appropriate flags.
170  if (!writeback && reg_idx == INTREG_PC) {
171  (*uop)->setFlag(StaticInst::IsControl);
172  (*uop)->setFlag(StaticInst::IsIndirectControl);
173 
174  if (!(condCode == COND_AL || condCode == COND_UC))
175  (*uop)->setFlag(StaticInst::IsCondControl);
176  else
177  (*uop)->setFlag(StaticInst::IsUncondControl);
178  }
179  } else {
180  *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
181  }
182 
183  if (up) addr += 4;
184  else addr -= 4;
185  --mem_ops;
186  }
187 
188  // Load/store micro-op generated, go to next uop
189  ++uop;
190  }
191 
192  if (writeback && ones) {
193  // Perform writeback uop operation
194  if (up)
195  *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
196  else
197  *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
198 
199  // Write PC after address writeback?
200  if (pc_temp) {
201  if (exception_ret) {
202  *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
203  } else {
204  *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
205  }
206  (*uop)->setFlag(StaticInst::IsControl);
207  (*uop)->setFlag(StaticInst::IsIndirectControl);
208 
209  if (!(condCode == COND_AL || condCode == COND_UC))
210  (*uop)->setFlag(StaticInst::IsCondControl);
211  else
212  (*uop)->setFlag(StaticInst::IsUncondControl);
213 
214  if (rn == INTREG_SP)
215  (*uop)->setFlag(StaticInst::IsReturn);
216 
217  ++uop;
218  }
219  }
220 
221  --uop;
222  (*uop)->setLastMicroop();
224 
225  /* Take the control flags from the last microop for the macroop */
226  if ((*uop)->isControl())
227  setFlag(StaticInst::IsControl);
228  if ((*uop)->isCondCtrl())
229  setFlag(StaticInst::IsCondControl);
230  if ((*uop)->isUncondCtrl())
231  setFlag(StaticInst::IsUncondControl);
232  if ((*uop)->isIndirectCtrl())
233  setFlag(StaticInst::IsIndirectControl);
234  if ((*uop)->isReturn())
235  setFlag(StaticInst::IsReturn);
236 
237  for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
238  (*uop)->setDelayedCommit();
239  }
240 }
241 
242 PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
243  uint32_t size, bool fp, bool load, bool noAlloc,
244  bool signExt, bool exclusive, bool acrel,
245  int64_t imm, AddrMode mode,
247  PredMacroOp(mnem, machInst, __opClass)
248 {
249  bool post = (mode == AddrMd_PostIndex);
250  bool writeback = (mode != AddrMd_Offset);
251 
252  if (load) {
253  // Use integer rounding to round up loads of size 4
254  numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
255  } else {
256  numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
257  }
259 
260  StaticInstPtr *uop = microOps;
261 
262  rn = makeSP(rn);
263 
264  if (!post) {
265  *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
266  post ? 0 : imm);
267  }
268 
269  if (fp) {
270  if (size == 16) {
271  if (load) {
272  *uop++ = new MicroLdFp16Uop(machInst, rt,
273  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
274  *uop++ = new MicroLdFp16Uop(machInst, rt2,
275  post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
276  } else {
277  *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
278  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
279  *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
280  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
281  *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
282  post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
283  *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
284  post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
285  }
286  } else if (size == 8) {
287  if (load) {
288  *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
289  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
290  } else {
291  *uop++ = new MicroStrFpXImmUop(machInst, rt,
292  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
293  *uop++ = new MicroStrFpXImmUop(machInst, rt2,
294  post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
295  }
296  } else if (size == 4) {
297  if (load) {
298  *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
299  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
300  } else {
301  *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
302  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
303  }
304  }
305  } else {
306  if (size == 8) {
307  if (load) {
308  *uop++ = new MicroLdPairUop(machInst, rt, rt2,
309  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
310  } else {
311  *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
312  0, noAlloc, exclusive, acrel);
313  *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
314  size, noAlloc, exclusive, acrel);
315  }
316  } else if (size == 4) {
317  if (load) {
318  if (signExt) {
319  *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
320  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
321  } else {
322  *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
323  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
324  }
325  } else {
326  *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
327  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
328  }
329  }
330  }
331 
332  if (writeback) {
333  *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
334  post ? imm : 0);
335  }
336 
337  assert(uop == &microOps[numMicroops]);
338  (*--uop)->setLastMicroop();
340 
341  for (StaticInstPtr *curUop = microOps;
342  !(*curUop)->isLastMicroop(); curUop++) {
343  (*curUop)->setDelayedCommit();
344  }
345 }
346 
347 BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
348  OpClass __opClass, bool load, IntRegIndex dest,
349  IntRegIndex base, int64_t imm) :
350  PredMacroOp(mnem, machInst, __opClass)
351 {
352  numMicroops = load ? 1 : 2;
354 
355  StaticInstPtr *uop = microOps;
356 
357  if (load) {
358  *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
359  } else {
360  *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
361  (*uop)->setDelayedCommit();
362  *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
363  }
364  (*uop)->setLastMicroop();
366 }
367 
368 BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
369  OpClass __opClass, bool load, IntRegIndex dest,
370  IntRegIndex base, int64_t imm) :
371  PredMacroOp(mnem, machInst, __opClass)
372 {
373  numMicroops = load ? 2 : 3;
375 
376  StaticInstPtr *uop = microOps;
377 
378  if (load) {
379  *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
380  } else {
381  *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
382  *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
383  }
384  *uop = new MicroAddXiUop(machInst, base, base, imm);
385  (*uop)->setLastMicroop();
387 
388  for (StaticInstPtr *curUop = microOps;
389  !(*curUop)->isLastMicroop(); curUop++) {
390  (*curUop)->setDelayedCommit();
391  }
392 }
393 
394 BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
395  OpClass __opClass, bool load, IntRegIndex dest,
396  IntRegIndex base, int64_t imm) :
397  PredMacroOp(mnem, machInst, __opClass)
398 {
399  numMicroops = load ? 2 : 3;
401 
402  StaticInstPtr *uop = microOps;
403 
404  if (load) {
405  *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
406  } else {
407  *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
408  *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
409  }
410  *uop = new MicroAddXiUop(machInst, base, base, imm);
411  (*uop)->setLastMicroop();
413 
414  for (StaticInstPtr *curUop = microOps;
415  !(*curUop)->isLastMicroop(); curUop++) {
416  (*curUop)->setDelayedCommit();
417  }
418 }
419 
420 BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
421  OpClass __opClass, bool load, IntRegIndex dest,
423  ArmExtendType type, int64_t imm) :
424  PredMacroOp(mnem, machInst, __opClass)
425 {
426  numMicroops = load ? 1 : 2;
428 
429  StaticInstPtr *uop = microOps;
430 
431  if (load) {
432  *uop = new MicroLdFp16RegUop(machInst, dest, base,
433  offset, type, imm);
434  } else {
435  *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
436  offset, type, imm);
437  (*uop)->setDelayedCommit();
438  *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
439  offset, type, imm);
440  }
441 
442  (*uop)->setLastMicroop();
444 }
445 
446 BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
447  OpClass __opClass, IntRegIndex dest,
448  int64_t imm) :
449  PredMacroOp(mnem, machInst, __opClass)
450 {
451  numMicroops = 1;
453 
454  microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
455  microOps[0]->setLastMicroop();
457 }
458 
459 VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
460  unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
461  unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
462  PredMacroOp(mnem, machInst, __opClass)
463 {
464  assert(regs > 0 && regs <= 4);
465  assert(regs % elems == 0);
466 
467  numMicroops = (regs > 2) ? 2 : 1;
468  bool wb = (rm != 15);
469  bool deinterleave = (elems > 1);
470 
471  if (wb) numMicroops++;
472  if (deinterleave) numMicroops += (regs / elems);
474 
475  RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
476 
477  uint32_t noAlign = TLB::MustBeOne;
478 
479  unsigned uopIdx = 0;
480  switch (regs) {
481  case 4:
482  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
483  size, machInst, rMid, rn, 0, align);
484  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
485  size, machInst, rMid + 4, rn, 16, noAlign);
486  break;
487  case 3:
488  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
489  size, machInst, rMid, rn, 0, align);
490  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
491  size, machInst, rMid + 4, rn, 16, noAlign);
492  break;
493  case 2:
494  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
495  size, machInst, rMid, rn, 0, align);
496  break;
497  case 1:
498  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
499  size, machInst, rMid, rn, 0, align);
500  break;
501  default:
502  // Unknown number of registers
503  microOps[uopIdx++] = new Unknown(machInst);
504  }
505  if (wb) {
506  if (rm != 15 && rm != 13) {
507  microOps[uopIdx++] =
508  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
509  } else {
510  microOps[uopIdx++] =
511  new MicroAddiUop(machInst, rn, rn, regs * 8);
512  }
513  }
514  if (deinterleave) {
515  switch (elems) {
516  case 4:
517  assert(regs == 4);
518  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
519  size, machInst, vd * 2, rMid, inc * 2);
520  break;
521  case 3:
522  assert(regs == 3);
523  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
524  size, machInst, vd * 2, rMid, inc * 2);
525  break;
526  case 2:
527  assert(regs == 4 || regs == 2);
528  if (regs == 4) {
529  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
530  size, machInst, vd * 2, rMid, inc * 2);
531  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
532  size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
533  } else {
534  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
535  size, machInst, vd * 2, rMid, inc * 2);
536  }
537  break;
538  default:
539  // Bad number of elements to deinterleave
540  microOps[uopIdx++] = new Unknown(machInst);
541  }
542  }
543  assert(uopIdx == numMicroops);
544 
545  for (unsigned i = 0; i < numMicroops - 1; i++) {
546  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
547  assert(uopPtr);
548  uopPtr->setDelayedCommit();
549  }
551  microOps[numMicroops - 1]->setLastMicroop();
552 }
553 
554 VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
555  OpClass __opClass, bool all, unsigned elems,
556  RegIndex rn, RegIndex vd, unsigned regs,
557  unsigned inc, uint32_t size, uint32_t align,
558  RegIndex rm, unsigned lane) :
559  PredMacroOp(mnem, machInst, __opClass)
560 {
561  assert(regs > 0 && regs <= 4);
562  assert(regs % elems == 0);
563 
564  unsigned eBytes = (1 << size);
565  unsigned loadSize = eBytes * elems;
566  unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
567  sizeof(FloatRegBits);
568 
569  assert(loadRegs > 0 && loadRegs <= 4);
570 
571  numMicroops = 1;
572  bool wb = (rm != 15);
573 
574  if (wb) numMicroops++;
575  numMicroops += (regs / elems);
577 
579 
580  unsigned uopIdx = 0;
581  switch (loadSize) {
582  case 1:
583  microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
584  machInst, ufp0, rn, 0, align);
585  break;
586  case 2:
587  if (eBytes == 2) {
588  microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
589  machInst, ufp0, rn, 0, align);
590  } else {
591  microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
592  machInst, ufp0, rn, 0, align);
593  }
594  break;
595  case 3:
596  microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
597  machInst, ufp0, rn, 0, align);
598  break;
599  case 4:
600  switch (eBytes) {
601  case 1:
602  microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
603  machInst, ufp0, rn, 0, align);
604  break;
605  case 2:
606  microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
607  machInst, ufp0, rn, 0, align);
608  break;
609  case 4:
610  microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
611  machInst, ufp0, rn, 0, align);
612  break;
613  }
614  break;
615  case 6:
616  microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
617  machInst, ufp0, rn, 0, align);
618  break;
619  case 8:
620  switch (eBytes) {
621  case 2:
622  microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
623  machInst, ufp0, rn, 0, align);
624  break;
625  case 4:
626  microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
627  machInst, ufp0, rn, 0, align);
628  break;
629  }
630  break;
631  case 12:
632  microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
633  machInst, ufp0, rn, 0, align);
634  break;
635  case 16:
636  microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
637  machInst, ufp0, rn, 0, align);
638  break;
639  default:
640  // Unrecognized load size
641  microOps[uopIdx++] = new Unknown(machInst);
642  }
643  if (wb) {
644  if (rm != 15 && rm != 13) {
645  microOps[uopIdx++] =
646  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
647  } else {
648  microOps[uopIdx++] =
649  new MicroAddiUop(machInst, rn, rn, loadSize);
650  }
651  }
652  switch (elems) {
653  case 4:
654  assert(regs == 4);
655  switch (size) {
656  case 0:
657  if (all) {
658  microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
659  machInst, vd * 2, ufp0, inc * 2);
660  } else {
661  microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
662  machInst, vd * 2, ufp0, inc * 2, lane);
663  }
664  break;
665  case 1:
666  if (all) {
667  microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
668  machInst, vd * 2, ufp0, inc * 2);
669  } else {
670  microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
671  machInst, vd * 2, ufp0, inc * 2, lane);
672  }
673  break;
674  case 2:
675  if (all) {
676  microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
677  machInst, vd * 2, ufp0, inc * 2);
678  } else {
679  microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
680  machInst, vd * 2, ufp0, inc * 2, lane);
681  }
682  break;
683  default:
684  // Bad size
685  microOps[uopIdx++] = new Unknown(machInst);
686  break;
687  }
688  break;
689  case 3:
690  assert(regs == 3);
691  switch (size) {
692  case 0:
693  if (all) {
694  microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
695  machInst, vd * 2, ufp0, inc * 2);
696  } else {
697  microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
698  machInst, vd * 2, ufp0, inc * 2, lane);
699  }
700  break;
701  case 1:
702  if (all) {
703  microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
704  machInst, vd * 2, ufp0, inc * 2);
705  } else {
706  microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
707  machInst, vd * 2, ufp0, inc * 2, lane);
708  }
709  break;
710  case 2:
711  if (all) {
712  microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
713  machInst, vd * 2, ufp0, inc * 2);
714  } else {
715  microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
716  machInst, vd * 2, ufp0, inc * 2, lane);
717  }
718  break;
719  default:
720  // Bad size
721  microOps[uopIdx++] = new Unknown(machInst);
722  break;
723  }
724  break;
725  case 2:
726  assert(regs == 2);
727  assert(loadRegs <= 2);
728  switch (size) {
729  case 0:
730  if (all) {
731  microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
732  machInst, vd * 2, ufp0, inc * 2);
733  } else {
734  microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
735  machInst, vd * 2, ufp0, inc * 2, lane);
736  }
737  break;
738  case 1:
739  if (all) {
740  microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
741  machInst, vd * 2, ufp0, inc * 2);
742  } else {
743  microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
744  machInst, vd * 2, ufp0, inc * 2, lane);
745  }
746  break;
747  case 2:
748  if (all) {
749  microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
750  machInst, vd * 2, ufp0, inc * 2);
751  } else {
752  microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
753  machInst, vd * 2, ufp0, inc * 2, lane);
754  }
755  break;
756  default:
757  // Bad size
758  microOps[uopIdx++] = new Unknown(machInst);
759  break;
760  }
761  break;
762  case 1:
763  assert(regs == 1 || (all && regs == 2));
764  assert(loadRegs <= 2);
765  for (unsigned offset = 0; offset < regs; offset++) {
766  switch (size) {
767  case 0:
768  if (all) {
769  microOps[uopIdx++] =
770  new MicroUnpackAllNeon2to2Uop<uint8_t>(
771  machInst, (vd + offset) * 2, ufp0, inc * 2);
772  } else {
773  microOps[uopIdx++] =
774  new MicroUnpackNeon2to2Uop<uint8_t>(
775  machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
776  }
777  break;
778  case 1:
779  if (all) {
780  microOps[uopIdx++] =
781  new MicroUnpackAllNeon2to2Uop<uint16_t>(
782  machInst, (vd + offset) * 2, ufp0, inc * 2);
783  } else {
784  microOps[uopIdx++] =
785  new MicroUnpackNeon2to2Uop<uint16_t>(
786  machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
787  }
788  break;
789  case 2:
790  if (all) {
791  microOps[uopIdx++] =
792  new MicroUnpackAllNeon2to2Uop<uint32_t>(
793  machInst, (vd + offset) * 2, ufp0, inc * 2);
794  } else {
795  microOps[uopIdx++] =
796  new MicroUnpackNeon2to2Uop<uint32_t>(
797  machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
798  }
799  break;
800  default:
801  // Bad size
802  microOps[uopIdx++] = new Unknown(machInst);
803  break;
804  }
805  }
806  break;
807  default:
808  // Bad number of elements to unpack
809  microOps[uopIdx++] = new Unknown(machInst);
810  }
811  assert(uopIdx == numMicroops);
812 
813  for (unsigned i = 0; i < numMicroops - 1; i++) {
814  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
815  assert(uopPtr);
816  uopPtr->setDelayedCommit();
817  }
819  microOps[numMicroops - 1]->setLastMicroop();
820 }
821 
822 VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
823  unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
824  unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
825  PredMacroOp(mnem, machInst, __opClass)
826 {
827  assert(regs > 0 && regs <= 4);
828  assert(regs % elems == 0);
829 
830  numMicroops = (regs > 2) ? 2 : 1;
831  bool wb = (rm != 15);
832  bool interleave = (elems > 1);
833 
834  if (wb) numMicroops++;
835  if (interleave) numMicroops += (regs / elems);
837 
838  uint32_t noAlign = TLB::MustBeOne;
839 
840  RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
841 
842  unsigned uopIdx = 0;
843  if (interleave) {
844  switch (elems) {
845  case 4:
846  assert(regs == 4);
847  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
848  size, machInst, rMid, vd * 2, inc * 2);
849  break;
850  case 3:
851  assert(regs == 3);
852  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
853  size, machInst, rMid, vd * 2, inc * 2);
854  break;
855  case 2:
856  assert(regs == 4 || regs == 2);
857  if (regs == 4) {
858  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
859  size, machInst, rMid, vd * 2, inc * 2);
860  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
861  size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
862  } else {
863  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
864  size, machInst, rMid, vd * 2, inc * 2);
865  }
866  break;
867  default:
868  // Bad number of elements to interleave
869  microOps[uopIdx++] = new Unknown(machInst);
870  }
871  }
872  switch (regs) {
873  case 4:
874  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
875  size, machInst, rMid, rn, 0, align);
876  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
877  size, machInst, rMid + 4, rn, 16, noAlign);
878  break;
879  case 3:
880  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
881  size, machInst, rMid, rn, 0, align);
882  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
883  size, machInst, rMid + 4, rn, 16, noAlign);
884  break;
885  case 2:
886  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
887  size, machInst, rMid, rn, 0, align);
888  break;
889  case 1:
890  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
891  size, machInst, rMid, rn, 0, align);
892  break;
893  default:
894  // Unknown number of registers
895  microOps[uopIdx++] = new Unknown(machInst);
896  }
897  if (wb) {
898  if (rm != 15 && rm != 13) {
899  microOps[uopIdx++] =
900  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
901  } else {
902  microOps[uopIdx++] =
903  new MicroAddiUop(machInst, rn, rn, regs * 8);
904  }
905  }
906  assert(uopIdx == numMicroops);
907 
908  for (unsigned i = 0; i < numMicroops - 1; i++) {
909  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
910  assert(uopPtr);
911  uopPtr->setDelayedCommit();
912  }
914  microOps[numMicroops - 1]->setLastMicroop();
915 }
916 
917 VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
918  OpClass __opClass, bool all, unsigned elems,
919  RegIndex rn, RegIndex vd, unsigned regs,
920  unsigned inc, uint32_t size, uint32_t align,
921  RegIndex rm, unsigned lane) :
922  PredMacroOp(mnem, machInst, __opClass)
923 {
924  assert(!all);
925  assert(regs > 0 && regs <= 4);
926  assert(regs % elems == 0);
927 
928  unsigned eBytes = (1 << size);
929  unsigned storeSize = eBytes * elems;
930  unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
931  sizeof(FloatRegBits);
932 
933  assert(storeRegs > 0 && storeRegs <= 4);
934 
935  numMicroops = 1;
936  bool wb = (rm != 15);
937 
938  if (wb) numMicroops++;
939  numMicroops += (regs / elems);
941 
943 
944  unsigned uopIdx = 0;
945  switch (elems) {
946  case 4:
947  assert(regs == 4);
948  switch (size) {
949  case 0:
950  microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
951  machInst, ufp0, vd * 2, inc * 2, lane);
952  break;
953  case 1:
954  microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
955  machInst, ufp0, vd * 2, inc * 2, lane);
956  break;
957  case 2:
958  microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
959  machInst, ufp0, vd * 2, inc * 2, lane);
960  break;
961  default:
962  // Bad size
963  microOps[uopIdx++] = new Unknown(machInst);
964  break;
965  }
966  break;
967  case 3:
968  assert(regs == 3);
969  switch (size) {
970  case 0:
971  microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
972  machInst, ufp0, vd * 2, inc * 2, lane);
973  break;
974  case 1:
975  microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
976  machInst, ufp0, vd * 2, inc * 2, lane);
977  break;
978  case 2:
979  microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
980  machInst, ufp0, vd * 2, inc * 2, lane);
981  break;
982  default:
983  // Bad size
984  microOps[uopIdx++] = new Unknown(machInst);
985  break;
986  }
987  break;
988  case 2:
989  assert(regs == 2);
990  assert(storeRegs <= 2);
991  switch (size) {
992  case 0:
993  microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
994  machInst, ufp0, vd * 2, inc * 2, lane);
995  break;
996  case 1:
997  microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
998  machInst, ufp0, vd * 2, inc * 2, lane);
999  break;
1000  case 2:
1001  microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1002  machInst, ufp0, vd * 2, inc * 2, lane);
1003  break;
1004  default:
1005  // Bad size
1006  microOps[uopIdx++] = new Unknown(machInst);
1007  break;
1008  }
1009  break;
1010  case 1:
1011  assert(regs == 1 || (all && regs == 2));
1012  assert(storeRegs <= 2);
1013  for (unsigned offset = 0; offset < regs; offset++) {
1014  switch (size) {
1015  case 0:
1016  microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1017  machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1018  break;
1019  case 1:
1020  microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1021  machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1022  break;
1023  case 2:
1024  microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1025  machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1026  break;
1027  default:
1028  // Bad size
1029  microOps[uopIdx++] = new Unknown(machInst);
1030  break;
1031  }
1032  }
1033  break;
1034  default:
1035  // Bad number of elements to unpack
1036  microOps[uopIdx++] = new Unknown(machInst);
1037  }
1038  switch (storeSize) {
1039  case 1:
1040  microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1041  machInst, ufp0, rn, 0, align);
1042  break;
1043  case 2:
1044  if (eBytes == 2) {
1045  microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1046  machInst, ufp0, rn, 0, align);
1047  } else {
1048  microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1049  machInst, ufp0, rn, 0, align);
1050  }
1051  break;
1052  case 3:
1053  microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1054  machInst, ufp0, rn, 0, align);
1055  break;
1056  case 4:
1057  switch (eBytes) {
1058  case 1:
1059  microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1060  machInst, ufp0, rn, 0, align);
1061  break;
1062  case 2:
1063  microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1064  machInst, ufp0, rn, 0, align);
1065  break;
1066  case 4:
1067  microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1068  machInst, ufp0, rn, 0, align);
1069  break;
1070  }
1071  break;
1072  case 6:
1073  microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1074  machInst, ufp0, rn, 0, align);
1075  break;
1076  case 8:
1077  switch (eBytes) {
1078  case 2:
1079  microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1080  machInst, ufp0, rn, 0, align);
1081  break;
1082  case 4:
1083  microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1084  machInst, ufp0, rn, 0, align);
1085  break;
1086  }
1087  break;
1088  case 12:
1089  microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1090  machInst, ufp0, rn, 0, align);
1091  break;
1092  case 16:
1093  microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1094  machInst, ufp0, rn, 0, align);
1095  break;
1096  default:
1097  // Bad store size
1098  microOps[uopIdx++] = new Unknown(machInst);
1099  }
1100  if (wb) {
1101  if (rm != 15 && rm != 13) {
1102  microOps[uopIdx++] =
1103  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1104  } else {
1105  microOps[uopIdx++] =
1106  new MicroAddiUop(machInst, rn, rn, storeSize);
1107  }
1108  }
1109  assert(uopIdx == numMicroops);
1110 
1111  for (unsigned i = 0; i < numMicroops - 1; i++) {
1112  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1113  assert(uopPtr);
1114  uopPtr->setDelayedCommit();
1115  }
1116  microOps[0]->setFirstMicroop();
1117  microOps[numMicroops - 1]->setLastMicroop();
1118 }
1119 
1120 VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1121  OpClass __opClass, RegIndex rn, RegIndex vd,
1122  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1123  uint8_t numStructElems, uint8_t numRegs, bool wb) :
1124  PredMacroOp(mnem, machInst, __opClass)
1125 {
1127  RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1128  bool baseIsSP = isSP((IntRegIndex) rnsp);
1129 
1130  numMicroops = wb ? 1 : 0;
1131 
1132  int totNumBytes = numRegs * dataSize / 8;
1133  assert(totNumBytes <= 64);
1134 
1135  // The guiding principle here is that no more than 16 bytes can be
1136  // transferred at a time
1137  int numMemMicroops = totNumBytes / 16;
1138  int residuum = totNumBytes % 16;
1139  if (residuum)
1140  ++numMemMicroops;
1141  numMicroops += numMemMicroops;
1142 
1143  int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1144  numMicroops += numMarshalMicroops;
1145 
1147  unsigned uopIdx = 0;
1148  uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1150 
1151  int i = 0;
1152  for (; i < numMemMicroops - 1; ++i) {
1153  microOps[uopIdx++] = new MicroNeonLoad64(
1154  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1155  baseIsSP, 16 /* accSize */, eSize);
1156  }
1157  microOps[uopIdx++] = new MicroNeonLoad64(
1158  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1159  residuum ? residuum : 16 /* accSize */, eSize);
1160 
1161  // Writeback microop: the post-increment amount is encoded in "Rm": a
1162  // 64-bit general register OR as '11111' for an immediate value equal to
1163  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1164  if (wb) {
1165  if (rm != ((RegIndex) INTREG_X31)) {
1166  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1167  UXTX, 0);
1168  } else {
1169  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1170  totNumBytes);
1171  }
1172  }
1173 
1174  for (int i = 0; i < numMarshalMicroops; ++i) {
1175  switch(numRegs) {
1176  case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1177  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1178  numStructElems, 1, i /* step */);
1179  break;
1180  case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1181  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1182  numStructElems, 2, i /* step */);
1183  break;
1184  case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1185  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1186  numStructElems, 3, i /* step */);
1187  break;
1188  case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1189  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1190  numStructElems, 4, i /* step */);
1191  break;
1192  default: panic("Invalid number of registers");
1193  }
1194 
1195  }
1196 
1197  assert(uopIdx == numMicroops);
1198 
1199  for (int i = 0; i < numMicroops - 1; ++i) {
1201  }
1202  microOps[numMicroops - 1]->setLastMicroop();
1203 }
1204 
1205 VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1206  OpClass __opClass, RegIndex rn, RegIndex vd,
1207  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1208  uint8_t numStructElems, uint8_t numRegs, bool wb) :
1209  PredMacroOp(mnem, machInst, __opClass)
1210 {
1212  RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1213  bool baseIsSP = isSP((IntRegIndex) rnsp);
1214 
1215  numMicroops = wb ? 1 : 0;
1216 
1217  int totNumBytes = numRegs * dataSize / 8;
1218  assert(totNumBytes <= 64);
1219 
1220  // The guiding principle here is that no more than 16 bytes can be
1221  // transferred at a time
1222  int numMemMicroops = totNumBytes / 16;
1223  int residuum = totNumBytes % 16;
1224  if (residuum)
1225  ++numMemMicroops;
1226  numMicroops += numMemMicroops;
1227 
1228  int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1229  numMicroops += numMarshalMicroops;
1230 
1232  unsigned uopIdx = 0;
1233 
1234  for (int i = 0; i < numMarshalMicroops; ++i) {
1235  switch (numRegs) {
1236  case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1237  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1238  numStructElems, 1, i /* step */);
1239  break;
1240  case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1241  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1242  numStructElems, 2, i /* step */);
1243  break;
1244  case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1245  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1246  numStructElems, 3, i /* step */);
1247  break;
1248  case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1249  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1250  numStructElems, 4, i /* step */);
1251  break;
1252  default: panic("Invalid number of registers");
1253  }
1254  }
1255 
1256  uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1258 
1259  int i = 0;
1260  for (; i < numMemMicroops - 1; ++i) {
1261  microOps[uopIdx++] = new MicroNeonStore64(
1262  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1263  baseIsSP, 16 /* accSize */, eSize);
1264  }
1265  microOps[uopIdx++] = new MicroNeonStore64(
1266  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1267  residuum ? residuum : 16 /* accSize */, eSize);
1268 
1269  // Writeback microop: the post-increment amount is encoded in "Rm": a
1270  // 64-bit general register OR as '11111' for an immediate value equal to
1271  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1272  if (wb) {
1273  if (rm != ((RegIndex) INTREG_X31)) {
1274  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1275  UXTX, 0);
1276  } else {
1277  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1278  totNumBytes);
1279  }
1280  }
1281 
1282  assert(uopIdx == numMicroops);
1283 
1284  for (int i = 0; i < numMicroops - 1; i++) {
1286  }
1287  microOps[numMicroops - 1]->setLastMicroop();
1288 }
1289 
1290 VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1291  OpClass __opClass, RegIndex rn, RegIndex vd,
1292  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1293  uint8_t numStructElems, uint8_t index, bool wb,
1294  bool replicate) :
1295  PredMacroOp(mnem, machInst, __opClass),
1296  eSize(0), dataSize(0), numStructElems(0), index(0),
1297  wb(false), replicate(false)
1298 
1299 {
1301  RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1302  bool baseIsSP = isSP((IntRegIndex) rnsp);
1303 
1304  numMicroops = wb ? 1 : 0;
1305 
1306  int eSizeBytes = 1 << eSize;
1307  int totNumBytes = numStructElems * eSizeBytes;
1308  assert(totNumBytes <= 64);
1309 
1310  // The guiding principle here is that no more than 16 bytes can be
1311  // transferred at a time
1312  int numMemMicroops = totNumBytes / 16;
1313  int residuum = totNumBytes % 16;
1314  if (residuum)
1315  ++numMemMicroops;
1316  numMicroops += numMemMicroops;
1317 
1318  int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1319  numMicroops += numMarshalMicroops;
1320 
1322  unsigned uopIdx = 0;
1323 
1324  uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1326 
1327  int i = 0;
1328  for (; i < numMemMicroops - 1; ++i) {
1329  microOps[uopIdx++] = new MicroNeonLoad64(
1330  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1331  baseIsSP, 16 /* accSize */, eSize);
1332  }
1333  microOps[uopIdx++] = new MicroNeonLoad64(
1334  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1335  residuum ? residuum : 16 /* accSize */, eSize);
1336 
1337  // Writeback microop: the post-increment amount is encoded in "Rm": a
1338  // 64-bit general register OR as '11111' for an immediate value equal to
1339  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1340  if (wb) {
1341  if (rm != ((RegIndex) INTREG_X31)) {
1342  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1343  UXTX, 0);
1344  } else {
1345  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1346  totNumBytes);
1347  }
1348  }
1349 
1350  for (int i = 0; i < numMarshalMicroops; ++i) {
1351  microOps[uopIdx++] = new MicroUnpackNeon64(
1352  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1353  numStructElems, index, i /* step */, replicate);
1354  }
1355 
1356  assert(uopIdx == numMicroops);
1357 
1358  for (int i = 0; i < numMicroops - 1; i++) {
1360  }
1361  microOps[numMicroops - 1]->setLastMicroop();
1362 }
1363 
1364 VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1365  OpClass __opClass, RegIndex rn, RegIndex vd,
1366  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1367  uint8_t numStructElems, uint8_t index, bool wb,
1368  bool replicate) :
1369  PredMacroOp(mnem, machInst, __opClass),
1370  eSize(0), dataSize(0), numStructElems(0), index(0),
1371  wb(false), replicate(false)
1372 {
1374  RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1375  bool baseIsSP = isSP((IntRegIndex) rnsp);
1376 
1377  numMicroops = wb ? 1 : 0;
1378 
1379  int eSizeBytes = 1 << eSize;
1380  int totNumBytes = numStructElems * eSizeBytes;
1381  assert(totNumBytes <= 64);
1382 
1383  // The guiding principle here is that no more than 16 bytes can be
1384  // transferred at a time
1385  int numMemMicroops = totNumBytes / 16;
1386  int residuum = totNumBytes % 16;
1387  if (residuum)
1388  ++numMemMicroops;
1389  numMicroops += numMemMicroops;
1390 
1391  int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1392  numMicroops += numMarshalMicroops;
1393 
1395  unsigned uopIdx = 0;
1396 
1397  for (int i = 0; i < numMarshalMicroops; ++i) {
1398  microOps[uopIdx++] = new MicroPackNeon64(
1399  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1400  numStructElems, index, i /* step */, replicate);
1401  }
1402 
1403  uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1405 
1406  int i = 0;
1407  for (; i < numMemMicroops - 1; ++i) {
1408  microOps[uopIdx++] = new MicroNeonStore64(
1409  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1410  baseIsSP, 16 /* accsize */, eSize);
1411  }
1412  microOps[uopIdx++] = new MicroNeonStore64(
1413  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1414  residuum ? residuum : 16 /* accSize */, eSize);
1415 
1416  // Writeback microop: the post-increment amount is encoded in "Rm": a
1417  // 64-bit general register OR as '11111' for an immediate value equal to
1418  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1419  if (wb) {
1420  if (rm != ((RegIndex) INTREG_X31)) {
1421  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1422  UXTX, 0);
1423  } else {
1424  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1425  totNumBytes);
1426  }
1427  }
1428 
1429  assert(uopIdx == numMicroops);
1430 
1431  for (int i = 0; i < numMicroops - 1; i++) {
1433  }
1434  microOps[numMicroops - 1]->setLastMicroop();
1435 }
1436 
1437 MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1438  OpClass __opClass, IntRegIndex rn,
1439  RegIndex vd, bool single, bool up,
1440  bool writeback, bool load, uint32_t offset) :
1441  PredMacroOp(mnem, machInst, __opClass)
1442 {
1443  int i = 0;
1444 
1445  // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1446  // to be functionally identical except that fldmx is deprecated. For now
1447  // we'll assume they're otherwise interchangable.
1448  int count = (single ? offset : (offset / 2));
1449  if (count == 0 || count > NumFloatV7ArchRegs)
1450  warn_once("Bad offset field for VFP load/store multiple.\n");
1451  if (count == 0) {
1452  // Force there to be at least one microop so the macroop makes sense.
1453  writeback = true;
1454  }
1455  if (count > NumFloatV7ArchRegs)
1456  count = NumFloatV7ArchRegs;
1457 
1458  numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1460 
1461  int64_t addr = 0;
1462 
1463  if (!up)
1464  addr = 4 * offset;
1465 
1466  bool tempUp = up;
1467  for (int j = 0; j < count; j++) {
1468  if (load) {
1469  if (single) {
1470  microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1471  tempUp, addr);
1472  } else {
1473  microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1474  tempUp, addr);
1475  microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1476  addr + (up ? 4 : -4));
1477  }
1478  } else {
1479  if (single) {
1480  microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1481  tempUp, addr);
1482  } else {
1483  microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1484  tempUp, addr);
1485  microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1486  addr + (up ? 4 : -4));
1487  }
1488  }
1489  if (!tempUp) {
1490  addr -= (single ? 4 : 8);
1491  // The microops don't handle negative displacement, so turn if we
1492  // hit zero, flip polarity and start adding.
1493  if (addr <= 0) {
1494  tempUp = true;
1495  addr = -addr;
1496  }
1497  } else {
1498  addr += (single ? 4 : 8);
1499  }
1500  }
1501 
1502  if (writeback) {
1503  if (up) {
1504  microOps[i++] =
1505  new MicroAddiUop(machInst, rn, rn, 4 * offset);
1506  } else {
1507  microOps[i++] =
1508  new MicroSubiUop(machInst, rn, rn, 4 * offset);
1509  }
1510  }
1511 
1512  assert(numMicroops == i);
1514 
1515  for (StaticInstPtr *curUop = microOps;
1516  !(*curUop)->isLastMicroop(); curUop++) {
1517  MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1518  assert(uopPtr);
1519  uopPtr->setDelayedCommit();
1520  }
1521 }
1522 
1523 std::string
1525 {
1526  std::stringstream ss;
1527  printMnemonic(ss);
1528  printReg(ss, ura);
1529  ss << ", ";
1530  printReg(ss, urb);
1531  ss << ", ";
1532  ccprintf(ss, "#%d", imm);
1533  return ss.str();
1534 }
1535 
1536 std::string
1538 {
1539  std::stringstream ss;
1540  printMnemonic(ss);
1541  printReg(ss, ura);
1542  ss << ", ";
1543  printReg(ss, urb);
1544  ss << ", ";
1545  ccprintf(ss, "#%d", imm);
1546  return ss.str();
1547 }
1548 
1549 std::string
1551 {
1552  std::stringstream ss;
1553  printMnemonic(ss);
1554  ss << "[PC,CPSR]";
1555  return ss.str();
1556 }
1557 
1558 std::string
1560 {
1561  std::stringstream ss;
1562  printMnemonic(ss);
1563  printReg(ss, ura);
1564  ccprintf(ss, ", ");
1565  printReg(ss, urb);
1567  return ss.str();
1568 }
1569 
1570 std::string
1572 {
1573  std::stringstream ss;
1574  printMnemonic(ss);
1575  printReg(ss, ura);
1576  ss << ", ";
1577  printReg(ss, urb);
1578  return ss.str();
1579 }
1580 
1581 std::string
1583 {
1584  std::stringstream ss;
1585  printMnemonic(ss);
1586  printReg(ss, ura);
1587  ss << ", ";
1588  printReg(ss, urb);
1589  ss << ", ";
1590  printReg(ss, urc);
1591  return ss.str();
1592 }
1593 
1594 std::string
1596 {
1597  std::stringstream ss;
1598  printMnemonic(ss);
1599  if (isFloating())
1600  printReg(ss, ura + FP_Reg_Base);
1601  else
1602  printReg(ss, ura);
1603  ss << ", [";
1604  printReg(ss, urb);
1605  ss << ", ";
1606  ccprintf(ss, "#%d", imm);
1607  ss << "]";
1608  return ss.str();
1609 }
1610 
1611 std::string
1613 {
1614  std::stringstream ss;
1615  printMnemonic(ss);
1616  printReg(ss, dest);
1617  ss << ",";
1618  printReg(ss, dest2);
1619  ss << ", [";
1620  printReg(ss, urb);
1621  ss << ", ";
1622  ccprintf(ss, "#%d", imm);
1623  ss << "]";
1624  return ss.str();
1625 }
1626 
1627 }
void ccprintf(cp::Print &print)
Definition: cprintf.hh:130
static IntRegIndex makeSP(IntRegIndex reg)
Definition: intregs.hh:501
void setDelayedCommit()
Definition: static_inst.hh:179
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const
Internal function to generate disassembly string.
Definition: macromem.cc:1612
Bitfield< 30, 0 > index
Bitfield< 5, 3 > reg
Definition: types.hh:89
BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, IntRegIndex dest, IntRegIndex base, int64_t imm)
Definition: macromem.cc:368
void printExtendOperand(bool firstOperand, std::ostream &os, IntRegIndex rm, ArmExtendType type, int64_t shiftAmt) const
Definition: static_inst.cc:528
Bitfield< 15, 12 > rt
Definition: types.hh:120
IntRegIndex
Definition: intregs.hh:53
Bitfield< 7 > i
Definition: miscregs.hh:1378
#define panic(...)
Definition: misc.hh:153
VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t index, bool wb, bool replicate=false)
Definition: macromem.cc:1290
VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t index, bool wb, bool replicate=false)
Definition: macromem.cc:1364
void printMnemonic(std::ostream &os, const std::string &suffix="", bool withPred=true, bool withCond64=false, ConditionCode cond64=COND_UC) const
Definition: static_inst.cc:345
void printReg(std::ostream &os, int reg) const
Print a register name for disassembly given the unique dependence tag number (FP or int)...
Definition: static_inst.cc:296
ip6_addr_t addr
Definition: inet.hh:335
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const
Internal function to generate disassembly string.
Definition: macromem.cc:1524
Bitfield< 0 > fp
T * get() const
Directly access the pointer itself without taking a reference.
Definition: refcnt.hh:180
MacroVFPMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex rn, RegIndex vd, bool single, bool up, bool writeback, bool load, uint32_t offset)
Definition: macromem.cc:1437
#define warn_once(...)
Definition: misc.hh:226
Bitfield< 23, 0 > offset
Definition: types.hh:149
TheISA::RegIndex RegIndex
Logical register index type.
Definition: static_inst.hh:74
Bitfield< 4, 0 > mode
Definition: miscregs.hh:1385
BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex dest, int64_t imm)
Definition: macromem.cc:446
BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, IntRegIndex dest, IntRegIndex base, int64_t imm)
Definition: macromem.cc:394
VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned width, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm)
Definition: macromem.cc:822
VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, bool wb)
Definition: macromem.cc:1120
static unsigned int number_of_ones(int32_t val)
Definition: macromem.hh:52
Bitfield< 3, 0 > rm
Definition: types.hh:123
PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, bool exclusive, bool acrel, int64_t imm, AddrMode mode, IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2)
Definition: macromem.cc:242
Utility functions and datatypes used by AArch64 NEON memory instructions.
BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, IntRegIndex dest, IntRegIndex base, int64_t imm)
Definition: macromem.cc:347
const ExtMachInst machInst
The binary machine instruction.
Definition: static_inst.hh:218
ConditionCode condCode
Definition: pred_inst.hh:188
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const
Internal function to generate disassembly string.
Definition: macromem.cc:1550
void replaceBits(T &val, int first, int last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition: bitfield.hh:145
Bitfield< 51, 12 > base
Definition: pagetable.hh:85
Bitfield< 19, 16 > rn
Definition: types.hh:118
uint32_t numMicroops
Definition: pred_inst.hh:309
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const
Internal function to generate disassembly string.
Definition: macromem.cc:1595
VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm)
Definition: macromem.cc:459
Bitfield< 21 > ss
Definition: miscregs.hh:1371
void setFirstMicroop()
Definition: static_inst.hh:177
void setLastMicroop()
Definition: static_inst.hh:178
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
Bitfield< 7, 0 > imm
Definition: types.hh:137
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const
Internal function to generate disassembly string.
Definition: macromem.cc:1571
VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, bool wb)
Definition: macromem.cc:1205
Bitfield< 21 > writeback
Definition: types.hh:131
VldSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool all, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
Definition: macromem.cc:554
Bitfield< 24 > j
Definition: miscregs.hh:1369
static const int NumArgumentRegs M5_VAR_USED
Definition: process.cc:83
VstSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool all, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
Definition: macromem.cc:917
Bitfield< 23 > up
Definition: types.hh:129
type
Definition: misc.hh:728
int size()
Definition: pagetable.hh:146
Base class for predicated macro-operations.
Definition: pred_inst.hh:305
Bitfield< 29 > vx
Definition: miscregs.hh:65
Base class for Memory microops.
Definition: macromem.hh:66
const int NumFloatV8ArchRegs
Definition: registers.hh:79
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const
Internal function to generate disassembly string.
Definition: macromem.cc:1537
void setFlag(Flags f)
Definition: static_inst.hh:180
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const
Internal function to generate disassembly string.
Definition: macromem.cc:1582
ArmExtendType type
Definition: macromem.hh:344
ArmExtendType
Definition: types.hh:516
TheISA::ExtMachInst ExtMachInst
Binary extended machine instruction type.
Definition: static_inst.hh:72
IntReg pc
Definition: remote_gdb.hh:91
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const
Internal function to generate disassembly string.
Definition: macromem.cc:1559
BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, IntRegIndex dest, IntRegIndex base, IntRegIndex offset, ArmExtendType type, int64_t imm)
Definition: macromem.cc:420
StaticInstPtr * microOps
Definition: pred_inst.hh:310
static bool isSP(IntRegIndex reg)
Definition: intregs.hh:517
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it...
Definition: bitfield.hh:67
const int FP_Reg_Base
Definition: registers.hh:113
bool isLastMicroop() const
Definition: static_inst.hh:171
int count
Definition: refcnt.hh:52
static int intRegInMode(OperatingMode mode, int reg)
Definition: intregs.hh:464
bool isFloating() const
Definition: static_inst.hh:142
const int NumFloatV7ArchRegs
Definition: registers.hh:78
uint32_t FloatRegBits
Definition: registers.hh:66

Generated on Fri Jun 9 2017 13:03:38 for gem5 by doxygen 1.8.6