llvm.org GIT mirror llvm / 5aa8056
[AMDGPU] Allow register tuples to set asm names This change reverts most of the previous register name generation. The real problem is that RegisterTuple does not generate asm names. Added optional operand to RegisterTuple. This way we can simplify register name access and dramatically reduce the size of static tables for the backend. Differential Revision: https://reviews.llvm.org/D64967 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366598 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin a month ago
6 changed file(s) with 119 addition(s) and 143 deletion(s). Raw diff Collapse all Expand all
350350 // RegisterTuples instances can be used in other set operations to form
351351 // register classes and so on. This is the only way of using the generated
352352 // registers.
353 class RegisterTuples Indices, list Regs> {
353 //
354 // RegNames may be specified to supply asm names for the generated tuples.
355 // If used must have the same size as the list of produced registers.
356 class RegisterTuples Indices, list Regs,
357 list RegNames = []> {
354358 // SubRegs - N lists of registers to be zipped up. Super-registers are
355359 // synthesized from the first element of each SubRegs list, the second
356360 // element and so on.
359363 // SubRegIndices - N SubRegIndex instances. This provides the names of the
360364 // sub-registers in the synthesized super-registers.
361365 list SubRegIndices = Indices;
366
367 // List of asm names for the generated tuple registers.
368 list RegAsmNames = RegNames;
362369 }
363370
364371
291291 }
292292 #endif
293293
294 unsigned AltName = AMDGPU::NoRegAltName;
295
296 if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo) ||
297 MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo) ||
298 MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(RegNo))
299 AltName = AMDGPU::Reg64;
300 else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo) ||
301 MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo) ||
302 MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(RegNo))
303 AltName = AMDGPU::Reg128;
304 else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo) ||
305 MRI.getRegClass(AMDGPU::SReg_96RegClassID).contains(RegNo))
306 AltName = AMDGPU::Reg96;
307 else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo) ||
308 MRI.getRegClass(AMDGPU::SReg_160RegClassID).contains(RegNo))
309 AltName = AMDGPU::Reg160;
310 else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo) ||
311 MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo))
312 AltName = AMDGPU::Reg256;
313 else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo) ||
314 MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo) ||
315 MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(RegNo))
316 AltName = AMDGPU::Reg512;
317 else if (MRI.getRegClass(AMDGPU::VReg_1024RegClassID).contains(RegNo) ||
318 MRI.getRegClass(AMDGPU::SReg_1024RegClassID).contains(RegNo) ||
319 MRI.getRegClass(AMDGPU::AReg_1024RegClassID).contains(RegNo))
320 AltName = AMDGPU::Reg1024;
321
322 O << getRegisterName(RegNo, AltName);
294 O << getRegisterName(RegNo);
323295 }
324296
325297 void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
1111 #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
1212 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
1313
14 #include "AMDGPUMCTargetDesc.h"
1514 #include "llvm/MC/MCInstPrinter.h"
1615
1716 namespace llvm {
2524 //Autogenerated by tblgen
2625 void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
2726 raw_ostream &O);
28 static const char *getRegisterName(unsigned RegNo,
29 unsigned AltIdx = AMDGPU::NoRegAltName);
27 static const char *getRegisterName(unsigned RegNo);
3028
3129 void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
3230 const MCSubtargetInfo &STI) override;
13461346 }
13471347
13481348 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
1349 const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
1350 unsigned Size = getRegSizeInBits(*RC);
1351 unsigned AltName = AMDGPU::NoRegAltName;
1352
1353 switch (Size) {
1354 case 64: AltName = AMDGPU::Reg64; break;
1355 case 96: AltName = AMDGPU::Reg96; break;
1356 case 128: AltName = AMDGPU::Reg128; break;
1357 case 160: AltName = AMDGPU::Reg160; break;
1358 case 256: AltName = AMDGPU::Reg256; break;
1359 case 512: AltName = AMDGPU::Reg512; break;
1360 case 1024: AltName = AMDGPU::Reg1024; break;
1361 }
1362 return AMDGPUInstPrinter::getRegisterName(Reg, AltName);
1349 return AMDGPUInstPrinter::getRegisterName(Reg);
13631350 }
13641351
13651352 // FIXME: This is very slow. It might be worth creating a map from physreg to
3636 !if(!eq(size, 16), ret16, ret32))))));
3737 }
3838
39 let Namespace = "AMDGPU" in {
40 defset list AllRegAltNameIndices = {
41 def Reg64 : RegAltNameIndex;
42 def Reg96 : RegAltNameIndex;
43 def Reg128 : RegAltNameIndex;
44 def Reg160 : RegAltNameIndex;
45 def Reg256 : RegAltNameIndex;
46 def Reg512 : RegAltNameIndex;
47 def Reg1024 : RegAltNameIndex;
48 }
39 // Generates list of sequential register tuple names.
40 // E.g. RegSeq<3,2,2,"s">.ret -> [ "s[0:1]", "s[2:3]" ]
41 class RegSeq {
42 int next = !add(start, stride);
43 int end_reg = !add(!add(start, size), -1);
44 list ret =
45 !if(!le(end_reg, last_reg),
46 !listconcat([prefix # "[" # start # ":" # end_reg # "]"],
47 RegSeq.ret),
48 []);
4949 }
5050
5151 //===----------------------------------------------------------------------===//
5252 // Declarations that describe the SI registers
5353 //===----------------------------------------------------------------------===//
54 class SIReg regIdx = 0, string prefix = "",
55 int regNo = !cast(regIdx)> :
56 Register
57 [ n, n, n, n, n, n, n ],
58 [ prefix # "[" # regNo # ":" # !and(!add(regNo, 1), 255) # "]",
59 prefix # "[" # regNo # ":" # !and(!add(regNo, 2), 255) # "]",
60 prefix # "[" # regNo # ":" # !and(!add(regNo, 3), 255) # "]",
61 prefix # "[" # regNo # ":" # !and(!add(regNo, 4), 255) # "]",
62 prefix # "[" # regNo # ":" # !and(!add(regNo, 7), 255) # "]",
63 prefix # "[" # regNo # ":" # !and(!add(regNo, 15), 255) # "]",
64 prefix # "[" # regNo # ":" # !and(!add(regNo, 31), 255) # "]",
65 ])>,
54 class SIReg regIdx = 0> :
55 Register,
6656 DwarfRegNum<[!cast(HWEncoding)]> {
6757 let Namespace = "AMDGPU";
68 let RegAltNameIndices = AllRegAltNameIndices;
6958
7059 // This is the not yet the complete register encoding. An additional
7160 // bit is set for VGPRs.
7261 let HWEncoding = regIdx;
73 }
74
75 class SIRegisterWithSubRegs subregs> :
76 RegisterWithSubRegs {
77 let RegAltNameIndices = AllRegAltNameIndices;
78 let AltNames = [ n, n, n, n, n, n, n ];
7962 }
8063
8164 // Special Registers
9073 def SCRATCH_WAVE_OFFSET_REG : SIReg<"scratch_wave_offset", 0>;
9174
9275 // VCC for 64-bit instructions
93 def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
76 def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
9477 DwarfRegAlias {
9578 let Namespace = "AMDGPU";
9679 let SubRegIndices = [sub0, sub1];
10083 def EXEC_LO : SIReg<"exec_lo", 126>;
10184 def EXEC_HI : SIReg<"exec_hi", 127>;
10285
103 def EXEC : SIRegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
86 def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
10487 DwarfRegAlias {
10588 let Namespace = "AMDGPU";
10689 let SubRegIndices = [sub0, sub1];
131114 def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
132115 def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
133116
134 def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
117 def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
135118 DwarfRegAlias {
136119 let Namespace = "AMDGPU";
137120 let SubRegIndices = [sub0, sub1];
142125 def TBA_LO : SIReg<"tba_lo", 108>;
143126 def TBA_HI : SIReg<"tba_hi", 109>;
144127
145 def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
128 def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
146129 DwarfRegAlias {
147130 let Namespace = "AMDGPU";
148131 let SubRegIndices = [sub0, sub1];
152135 def TMA_LO : SIReg<"tma_lo", 110>;
153136 def TMA_HI : SIReg<"tma_hi", 111>;
154137
155 def TMA : SIRegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
138 def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
156139 DwarfRegAlias {
157140 let Namespace = "AMDGPU";
158141 let SubRegIndices = [sub0, sub1];
172155 }
173156
174157 class FlatReg encoding> :
175 SIRegisterWithSubRegs<"flat_scratch", [lo, hi]>,
158 RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
176159 DwarfRegAlias {
177160 let Namespace = "AMDGPU";
178161 let SubRegIndices = [sub0, sub1];
188171
189172 // SGPR registers
190173 foreach Index = 0-105 in {
191 def SGPR#Index : SIReg <"s"#Index, Index, "s">;
174 def SGPR#Index : SIReg <"s"#Index, Index>;
192175 }
193176
194177 // VGPR registers
195178 foreach Index = 0-255 in {
196 def VGPR#Index : SIReg <"v"#Index, Index, "v"> {
179 def VGPR#Index : SIReg <"v"#Index, Index> {
197180 let HWEncoding{8} = 1;
198181 }
199182 }
200183
201184 // AccVGPR registers
202185 foreach Index = 0-255 in {
203 def AGPR#Index : SIReg <"a"#Index, Index, "a"> {
186 def AGPR#Index : SIReg <"a"#Index, Index> {
204187 let HWEncoding{8} = 1;
205188 }
206189 }
232215 // SGPR 64-bit registers
233216 def SGPR_64Regs : RegisterTuples.ret,
234217 [(add (decimate SGPR_32, 2)),
235 (add (decimate (shl SGPR_32, 1), 2))]>;
218 (add (decimate (shl SGPR_32, 1), 2))],
219 RegSeq<105, 2, 2, "s">.ret>;
236220
237221 // SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
238222 def SGPR_96Regs : RegisterTuples.ret,
239223 [(add (decimate SGPR_32, 3)),
240224 (add (decimate (shl SGPR_32, 1), 3)),
241 (add (decimate (shl SGPR_32, 2), 3))]>;
225 (add (decimate (shl SGPR_32, 2), 3))],
226 RegSeq<105, 3, 3, "s">.ret>;
242227
243228 // SGPR 128-bit registers
244229 def SGPR_128Regs : RegisterTuples.ret,
245230 [(add (decimate SGPR_32, 4)),
246231 (add (decimate (shl SGPR_32, 1), 4)),
247232 (add (decimate (shl SGPR_32, 2), 4)),
248 (add (decimate (shl SGPR_32, 3), 4))]>;
233 (add (decimate (shl SGPR_32, 3), 4))],
234 RegSeq<105, 4, 4, "s">.ret>;
249235
250236 // SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs.
251237 def SGPR_160Regs : RegisterTuples.ret,
253239 (add (decimate (shl SGPR_32, 1), 4)),
254240 (add (decimate (shl SGPR_32, 2), 4)),
255241 (add (decimate (shl SGPR_32, 3), 4)),
256 (add (decimate (shl SGPR_32, 4), 4))]>;
242 (add (decimate (shl SGPR_32, 4), 4))],
243 RegSeq<105, 4, 5, "s">.ret>;
257244
258245 // SGPR 256-bit registers
259246 def SGPR_256Regs : RegisterTuples.ret,
264251 (add (decimate (shl SGPR_32, 4), 4)),
265252 (add (decimate (shl SGPR_32, 5), 4)),
266253 (add (decimate (shl SGPR_32, 6), 4)),
267 (add (decimate (shl SGPR_32, 7), 4))]>;
254 (add (decimate (shl SGPR_32, 7), 4))],
255 RegSeq<105, 4, 8, "s">.ret>;
268256
269257 // SGPR 512-bit registers
270258 def SGPR_512Regs : RegisterTuples.ret,
283271 (add (decimate (shl SGPR_32, 12), 4)),
284272 (add (decimate (shl SGPR_32, 13), 4)),
285273 (add (decimate (shl SGPR_32, 14), 4)),
286 (add (decimate (shl SGPR_32, 15), 4))]>;
274 (add (decimate (shl SGPR_32, 15), 4))],
275 RegSeq<105, 4, 16, "s">.ret>;
287276
288277 // SGPR 1024-bit registers
289278 def SGPR_1024Regs : RegisterTuples.ret,
318307 (add (decimate (shl SGPR_32, 28), 4)),
319308 (add (decimate (shl SGPR_32, 29), 4)),
320309 (add (decimate (shl SGPR_32, 30), 4)),
321 (add (decimate (shl SGPR_32, 31), 4))]>;
310 (add (decimate (shl SGPR_32, 31), 4))],
311 RegSeq<105, 4, 32, "s">.ret>;
322312
323313 // Trap handler TMP 32-bit registers
324314 def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
329319 // Trap handler TMP 64-bit registers
330320 def TTMP_64Regs : RegisterTuples.ret,
331321 [(add (decimate TTMP_32, 2)),
332 (add (decimate (shl TTMP_32, 1), 2))]>;
322 (add (decimate (shl TTMP_32, 1), 2))],
323 RegSeq<15, 2, 2, "ttmp">.ret>;
333324
334325 // Trap handler TMP 128-bit registers
335326 def TTMP_128Regs : RegisterTuples.ret,
336327 [(add (decimate TTMP_32, 4)),
337328 (add (decimate (shl TTMP_32, 1), 4)),
338329 (add (decimate (shl TTMP_32, 2), 4)),
339 (add (decimate (shl TTMP_32, 3), 4))]>;
330 (add (decimate (shl TTMP_32, 3), 4))],
331 RegSeq<15, 4, 4, "ttmp">.ret>;
340332
341333 def TTMP_256Regs : RegisterTuples.ret,
342334 [(add (decimate TTMP_32, 4)),
346338 (add (decimate (shl TTMP_32, 4), 4)),
347339 (add (decimate (shl TTMP_32, 5), 4)),
348340 (add (decimate (shl TTMP_32, 6), 4)),
349 (add (decimate (shl TTMP_32, 7), 4))]>;
341 (add (decimate (shl TTMP_32, 7), 4))],
342 RegSeq<15, 4, 8, "ttmp">.ret>;
350343
351344 def TTMP_512Regs : RegisterTuples.ret,
352345 [(add (decimate TTMP_32, 4)),
364357 (add (decimate (shl TTMP_32, 12), 4)),
365358 (add (decimate (shl TTMP_32, 13), 4)),
366359 (add (decimate (shl TTMP_32, 14), 4)),
367 (add (decimate (shl TTMP_32, 15), 4))]>;
360 (add (decimate (shl TTMP_32, 15), 4))],
361 RegSeq<15, 4, 16, "ttmp">.ret>;
368362
369363 class TmpRegTuplesBase
370364 list subRegs,
371365 list indices = getSubRegs.ret,
372366 int index1 = !add(index, !add(size, -1)),
373367 string name = "ttmp["#index#":"#index1#"]"> :
374 SIRegisterWithSubRegs {
368 RegisterWithSubRegs {
375369 let HWEncoding = subRegs[0].HWEncoding;
376370 let SubRegIndices = indices;
377371 }
456450 // VGPR 64-bit registers
457451 def VGPR_64 : RegisterTuples.ret,
458452 [(add (trunc VGPR_32, 255)),
459 (add (shl VGPR_32, 1))]>;
453 (add (shl VGPR_32, 1))],
454 RegSeq<255, 1, 2, "v">.ret>;
460455
461456 // VGPR 96-bit registers
462457 def VGPR_96 : RegisterTuples.ret,
463458 [(add (trunc VGPR_32, 254)),
464459 (add (shl VGPR_32, 1)),
465 (add (shl VGPR_32, 2))]>;
460 (add (shl VGPR_32, 2))],
461 RegSeq<255, 1, 3, "v">.ret>;
466462
467463 // VGPR 128-bit registers
468464 def VGPR_128 : RegisterTuples.ret,
469465 [(add (trunc VGPR_32, 253)),
470466 (add (shl VGPR_32, 1)),
471467 (add (shl VGPR_32, 2)),
472 (add (shl VGPR_32, 3))]>;
468 (add (shl VGPR_32, 3))],
469 RegSeq<255, 1, 4, "v">.ret>;
473470
474471 // VGPR 160-bit registers
475472 def VGPR_160 : RegisterTuples.ret,
477474 (add (shl VGPR_32, 1)),
478475 (add (shl VGPR_32, 2)),
479476 (add (shl VGPR_32, 3)),
480 (add (shl VGPR_32, 4))]>;
477 (add (shl VGPR_32, 4))],
478 RegSeq<255, 1, 5, "v">.ret>;
481479
482480 // VGPR 256-bit registers
483481 def VGPR_256 : RegisterTuples.ret,
488486 (add (shl VGPR_32, 4)),
489487 (add (shl VGPR_32, 5)),
490488 (add (shl VGPR_32, 6)),
491 (add (shl VGPR_32, 7))]>;
489 (add (shl VGPR_32, 7))],
490 RegSeq<255, 1, 8, "v">.ret>;
492491
493492 // VGPR 512-bit registers
494493 def VGPR_512 : RegisterTuples.ret,
507506 (add (shl VGPR_32, 12)),
508507 (add (shl VGPR_32, 13)),
509508 (add (shl VGPR_32, 14)),
510 (add (shl VGPR_32, 15))]>;
509 (add (shl VGPR_32, 15))],
510 RegSeq<255, 1, 16, "v">.ret>;
511511
512512 // VGPR 1024-bit registers
513513 def VGPR_1024 : RegisterTuples.ret,
542542 (add (shl VGPR_32, 28)),
543543 (add (shl VGPR_32, 29)),
544544 (add (shl VGPR_32, 30)),
545 (add (shl VGPR_32, 31))]>;
545 (add (shl VGPR_32, 31))],
546 RegSeq<255, 1, 32, "v">.ret>;
546547
547548 // AccVGPR 32-bit registers
548549 def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
554555 // AGPR 64-bit registers
555556 def AGPR_64 : RegisterTuples.ret,
556557 [(add (trunc AGPR_32, 255)),
557 (add (shl AGPR_32, 1))]>;
558 (add (shl AGPR_32, 1))],
559 RegSeq<255, 1, 2, "a">.ret>;
558560
559561 // AGPR 128-bit registers
560562 def AGPR_128 : RegisterTuples.ret,
561563 [(add (trunc AGPR_32, 253)),
562564 (add (shl AGPR_32, 1)),
563565 (add (shl AGPR_32, 2)),
564 (add (shl AGPR_32, 3))]>;
566 (add (shl AGPR_32, 3))],
567 RegSeq<255, 1, 4, "a">.ret>;
565568
566569 // AGPR 512-bit registers
567570 def AGPR_512 : RegisterTuples.ret,
580583 (add (shl AGPR_32, 12)),
581584 (add (shl AGPR_32, 13)),
582585 (add (shl AGPR_32, 14)),
583 (add (shl AGPR_32, 15))]>;
586 (add (shl AGPR_32, 15))],
587 RegSeq<255, 1, 16, "a">.ret>;
584588
585589 // AGPR 1024-bit registers
586590 def AGPR_1024 : RegisterTuples.ret,
615619 (add (shl AGPR_32, 28)),
616620 (add (shl AGPR_32, 29)),
617621 (add (shl AGPR_32, 30)),
618 (add (shl AGPR_32, 31))]>;
622 (add (shl AGPR_32, 31))],
623 RegSeq<255, 1, 32, "a">.ret>;
619624
620625 //===----------------------------------------------------------------------===//
621626 // Register classes used as source and destination
628633 }
629634
630635 def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
631 (add PRIVATE_RSRC_REG), Reg128> {
636 (add PRIVATE_RSRC_REG)> {
632637 let isAllocatable = 0;
633638 let CopyCost = -1;
634639 }
671676 }
672677
673678 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
674 (add SGPR_64Regs), Reg64> {
679 (add SGPR_64Regs)> {
675680 let CopyCost = 1;
676681 let AllocationPriority = 11;
677682 }
678683
679684 // CCR (call clobbered registers) SGPR 64-bit registers
680685 def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
681 (add (trunc SGPR_64, 16)), Reg64> {
686 (add (trunc SGPR_64, 16))> {
682687 let CopyCost = SGPR_64.CopyCost;
683688 let AllocationPriority = SGPR_64.AllocationPriority;
684689 }
689694 }
690695
691696 def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
692 (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA), Reg64> {
697 (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
693698 let CopyCost = 1;
694699 let AllocationPriority = 13;
695700 }
696701
697702 def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
698 (add SReg_64_XEXEC, EXEC), Reg64> {
703 (add SReg_64_XEXEC, EXEC)> {
699704 let CopyCost = 1;
700705 let AllocationPriority = 13;
701706 }
718723 // There are no 3-component scalar instructions, but this is needed
719724 // for symmetry with VGPRs.
720725 def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
721 (add SGPR_96Regs), Reg96> {
726 (add SGPR_96Regs)> {
722727 let AllocationPriority = 14;
723728 }
724729
725730 def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
726 (add SGPR_96), Reg96> {
731 (add SGPR_96)> {
727732 let AllocationPriority = 14;
728733 }
729734
730735 def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
731 (add SGPR_128Regs), Reg128> {
736 (add SGPR_128Regs)> {
732737 let AllocationPriority = 15;
733738 }
734739
738743 }
739744
740745 def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
741 (add SGPR_128, TTMP_128), Reg128> {
746 (add SGPR_128, TTMP_128)> {
742747 let AllocationPriority = 15;
743748 }
744749
747752 // There are no 5-component scalar instructions, but this is needed
748753 // for symmetry with VGPRs.
749754 def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
750 (add SGPR_160Regs), Reg160> {
755 (add SGPR_160Regs)> {
751756 let AllocationPriority = 16;
752757 }
753758
754759 def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
755 (add SGPR_160), Reg160> {
760 (add SGPR_160)> {
756761 let AllocationPriority = 16;
757762 }
758763
759 def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs),
760 Reg256> {
764 def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
761765 let AllocationPriority = 17;
762766 }
763767
766770 }
767771
768772 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
769 (add SGPR_256, TTMP_256), Reg256> {
773 (add SGPR_256, TTMP_256)> {
770774 // Requires 4 s_mov_b64 to copy
771775 let CopyCost = 4;
772776 let AllocationPriority = 17;
773777 }
774778
775779 def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
776 (add SGPR_512Regs), Reg512> {
780 (add SGPR_512Regs)> {
777781 let AllocationPriority = 18;
778782 }
779783
783787 }
784788
785789 def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
786 (add SGPR_512, TTMP_512), Reg512> {
790 (add SGPR_512, TTMP_512)> {
787791 // Requires 8 s_mov_b64 to copy
788792 let CopyCost = 8;
789793 let AllocationPriority = 18;
795799 }
796800
797801 def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
798 (add SGPR_1024Regs), Reg1024> {
802 (add SGPR_1024Regs)> {
799803 let AllocationPriority = 19;
800804 }
801805
802806 def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
803 (add SGPR_1024), Reg1024> {
807 (add SGPR_1024)> {
804808 let CopyCost = 16;
805809 let AllocationPriority = 19;
806810 }
807811
808812 // Register class for all vector registers (VGPRs + Interploation Registers)
809813 def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
810 (add VGPR_64), Reg64> {
814 (add VGPR_64)> {
811815 let Size = 64;
812816
813817 // Requires 2 v_mov_b32 to copy
815819 let AllocationPriority = 2;
816820 }
817821
818 def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> {
822 def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> {
819823 let Size = 96;
820824
821825 // Requires 3 v_mov_b32 to copy
824828 }
825829
826830 def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
827 (add VGPR_128), Reg128> {
831 (add VGPR_128)> {
828832 let Size = 128;
829833
830834 // Requires 4 v_mov_b32 to copy
833837 }
834838
835839 def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
836 (add VGPR_160), Reg160> {
840 (add VGPR_160)> {
837841 let Size = 160;
838842
839843 // Requires 5 v_mov_b32 to copy
842846 }
843847
844848 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
845 (add VGPR_256), Reg256> {
849 (add VGPR_256)> {
846850 let Size = 256;
847851 let CopyCost = 8;
848852 let AllocationPriority = 6;
849853 }
850854
851855 def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
852 (add VGPR_512), Reg512> {
856 (add VGPR_512)> {
853857 let Size = 512;
854858 let CopyCost = 16;
855859 let AllocationPriority = 7;
856860 }
857861
858862 def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
859 (add VGPR_1024), Reg1024> {
863 (add VGPR_1024)> {
860864 let Size = 1024;
861865 let CopyCost = 32;
862866 let AllocationPriority = 8;
863867 }
864868
865869 def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
866 (add AGPR_64), Reg64> {
870 (add AGPR_64)> {
867871 let Size = 64;
868872
869873 let CopyCost = 5;
871875 }
872876
873877 def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
874 (add AGPR_128), Reg128> {
878 (add AGPR_128)> {
875879 let Size = 128;
876880
877881 // Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr
880884 }
881885
882886 def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
883 (add AGPR_512), Reg512> {
887 (add AGPR_512)> {
884888 let Size = 512;
885889 let CopyCost = 33;
886890 let AllocationPriority = 7;
887891 }
888892
889893 def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
890 (add AGPR_1024), Reg1024> {
894 (add AGPR_1024)> {
891895 let Size = 1024;
892896 let CopyCost = 65;
893897 let AllocationPriority = 8;
902906 let isAllocatable = 0;
903907 }
904908
905 def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64),
906 Reg64> {
909 def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
907910 let isAllocatable = 0;
908911 }
909912
913916 }
914917
915918 def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32,
916 (add AReg_64, VReg_64), Reg64> {
919 (add AReg_64, VReg_64)> {
917920 let isAllocatable = 0;
918921 }
919922
638638 // Precompute some types.
639639 Record *RegisterCl = Def->getRecords().getClass("Register");
640640 RecTy *RegisterRecTy = RecordRecTy::get(RegisterCl);
641 StringInit *BlankName = StringInit::get("");
641 std::vector RegNames =
642 Def->getValueAsListOfStrings("RegAsmNames");
642643
643644 // Zip them up.
644645 for (unsigned n = 0; n != Length; ++n) {
655656 unsigned(Reg->getValueAsInt("CostPerUse")));
656657 }
657658
659 StringInit *AsmName = StringInit::get("");
660 if (!RegNames.empty()) {
661 if (RegNames.size() <= n)
662 PrintFatalError(Def->getLoc(),
663 "Register tuple definition missing name for '" +
664 Name + "'.");
665 AsmName = StringInit::get(RegNames[n]);
666 }
667
658668 // Create a new Record representing the synthesized register. This record
659669 // is only for consumption by CodeGenRegister, it is not added to the
660670 // RecordKeeper.
682692 if (Field == "SubRegs")
683693 RV.setValue(ListInit::get(Tuple, RegisterRecTy));
684694
685 // Provide a blank AsmName. MC hacks are required anyway.
686695 if (Field == "AsmName")
687 RV.setValue(BlankName);
696 RV.setValue(AsmName);
688697
689698 // CostPerUse is aggregated from all Tuple members.
690699 if (Field == "CostPerUse")