llvm.org GIT mirror llvm / cba2181
AMDGPU: Separate R600 and GCN TableGen files Summary: We now have two sets of generated TableGen files, one for R600 and one for GCN, so each sub-target now has its own tables of instructions, registers, ISel patterns, etc. This should help reduce compile time since each sub-target now only has to consider information that is specific to itself. This will also help prevent the R600 sub-target from slowing down new features for GCN, like disassembler support, GlobalISel, etc. Reviewers: arsenm, nhaehnle, jvesely Reviewed By: arsenm Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D46365 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@335942 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 2 years ago
63 changed file(s) with 1881 addition(s) and 1535 deletion(s). Raw diff Collapse all Expand all
88
99 include "llvm/TableGen/SearchableTable.td"
1010 include "llvm/Target/Target.td"
11 include "AMDGPUFeatures.td"
1112
1213 //===------------------------------------------------------------===//
1314 // Subtarget Features (device properties)
1415 //===------------------------------------------------------------===//
1516
16 def FeatureFP64 : SubtargetFeature<"fp64",
17 "FP64",
18 "true",
19 "Enable double precision operations"
20 >;
21
22 def FeatureFMA : SubtargetFeature<"fmaf",
23 "FMA",
24 "true",
25 "Enable single precision FMA (not as fast as mul+add, but fused)"
26 >;
27
2817 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
2918 "FastFMAF32",
3019 "true",
4130 "HalfRate64Ops",
4231 "true",
4332 "Most fp64 instructions are half rate instead of quarter"
44 >;
45
46 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
47 "R600ALUInst",
48 "false",
49 "Older version of ALU instructions encoding"
50 >;
51
52 def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
53 "HasVertexCache",
54 "true",
55 "Specify use of dedicated vertex cache"
56 >;
57
58 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
59 "CaymanISA",
60 "true",
61 "Use Cayman ISA"
62 >;
63
64 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
65 "CFALUBug",
66 "true",
67 "GPU has CF_ALU bug"
6833 >;
6934
7035 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
152117 "VI SGPR initialization bug requiring a fixed SGPR allocation size"
153118 >;
154119
155 class SubtargetFeatureFetchLimit :
156 SubtargetFeature <"fetch"#Value,
157 "TexVTXClauseSize",
158 Value,
159 "Limit the maximum number of fetches in a clause to "#Value
160 >;
161
162 def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
163 def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
164
165 class SubtargetFeatureWavefrontSize : SubtargetFeature<
166 "wavefrontsize"#Value,
167 "WavefrontSize",
168 !cast(Value),
169 "The number of threads per wavefront"
170 >;
171
172 def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
173 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
174 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
175
176120 class SubtargetFeatureLDSBankCount : SubtargetFeature <
177121 "ldsbankcount"#Value,
178122 "LDSBankCount",
183127 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
184128 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
185129
186 class SubtargetFeatureLocalMemorySize : SubtargetFeature<
187 "localmemorysize"#Value,
188 "LocalMemorySize",
189 !cast(Value),
190 "The size of local memory in bytes"
191 >;
192
193 def FeatureGCN : SubtargetFeature<"gcn",
194 "IsGCN",
195 "true",
196 "GCN or newer GPU"
197 >;
198
199130 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
200131 "GCN3Encoding",
201132 "true",
368299 [FeatureFP64FP16Denormals]
369300 >;
370301
371 def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
372 "DX10Clamp",
373 "true",
374 "clamp modifier clamps NaNs to 0.0"
375 >;
376
377302 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
378303 "FPExceptions",
379304 "true",
414339 "DumpCode",
415340 "true",
416341 "Dump MachineInstrs in the CodeEmitter"
417 >;
418
419 def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
420 "EnablePromoteAlloca",
421 "true",
422 "Enable promote alloca pass"
423342 >;
424343
425344 // XXX - This should probably be removed once enabled by default
485404 "Dummy feature to disable assembler instructions"
486405 >;
487406
488 class SubtargetFeatureGeneration ,
407 def FeatureGCN : SubtargetFeature<"gcn",
408 "IsGCN",
409 "true",
410 "GCN or newer GPU"
411 >;
412
413 class AMDGPUSubtargetFeatureGeneration
489414 list Implies> :
490 SubtargetFeature
491 Value#" GPU generation", Implies>;
492
493 def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
494 def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
495 def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
496
497 def FeatureR600 : SubtargetFeatureGeneration<"R600",
498 [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
499 >;
500
501 def FeatureR700 : SubtargetFeatureGeneration<"R700",
502 [FeatureFetchLimit16, FeatureLocalMemorySize0]
503 >;
504
505 def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
506 [FeatureFetchLimit16, FeatureLocalMemorySize32768]
507 >;
508
509 def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
510 [FeatureFetchLimit16, FeatureWavefrontSize64,
511 FeatureLocalMemorySize32768]
512 >;
513
514 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
415 SubtargetFeatureGeneration ;
416
417 def FeatureSouthernIslands : AMDGPUSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
515418 [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
516419 FeatureWavefrontSize64, FeatureGCN,
517420 FeatureLDSBankCount32, FeatureMovrel]
518421 >;
519422
520 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
423 def FeatureSeaIslands : AMDGPUSubtargetFeatureGeneration<"SEA_ISLANDS",
521424 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
522425 FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
523426 FeatureCIInsts, FeatureMovrel]
524427 >;
525428
526 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
429 def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
527430 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
528431 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
529432 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
534437 ]
535438 >;
536439
537 def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
440 def FeatureGFX9 : AMDGPUSubtargetFeatureGeneration<"GFX9",
538441 [FeatureFP64, FeatureLocalMemorySize65536,
539442 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
540443 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
737640 // Predicate helper class
738641 //===----------------------------------------------------------------------===//
739642
740 def TruePredicate : Predicate<"true">;
741
742643 def isSICI : Predicate<
743644 "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
744645 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
830731 def EnableLateCFGStructurize : Predicate<
831732 "EnableLateStructurizeCFG">;
832733
833 // Exists to help track down where SubtargetPredicate isn't set rather
834 // than letting tablegen crash with an unhelpful error.
835 def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
836
837 class PredicateControl {
838 Predicate SubtargetPredicate = InvalidPred;
839 Predicate SIAssemblerPredicate = isSICI;
840 Predicate VIAssemblerPredicate = isVI;
841 list AssemblerPredicates = [];
842 Predicate AssemblerPredicate = TruePredicate;
843 list OtherPredicates = [];
844 list Predicates = !listconcat([SubtargetPredicate,
845 AssemblerPredicate],
846 AssemblerPredicates,
847 OtherPredicates);
848 }
849
850 class AMDGPUPat : Pat,
851 PredicateControl;
852
853
854734 // Include AMDGPU TD files
855 include "R600Schedule.td"
856 include "R600Processors.td"
857735 include "SISchedule.td"
858736 include "GCNProcessors.td"
859737 include "AMDGPUInstrInfo.td"
860738 include "AMDGPUIntrinsics.td"
739 include "SIIntrinsics.td"
861740 include "AMDGPURegisterInfo.td"
862741 include "AMDGPURegisterBanks.td"
863742 include "AMDGPUInstructions.td"
743 include "SIInstrInfo.td"
864744 include "AMDGPUCallingConv.td"
865745 include "AMDGPUSearchableTables.td"
8484 ]>>
8585 ]>;
8686
87 // Calling convention for R600
88 def CC_R600 : CallingConv<[
89 CCIfInReg
90 T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
91 T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
92 T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
93 T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
94 T30_XYZW, T31_XYZW, T32_XYZW
95 ]>>>
96 ]>;
97
9887 // Calling convention for compute kernels
9988 def CC_AMDGPU_Kernel : CallingConv<[
10089 CCCustom<"allocateKernArg">
164153 CCIf<"static_cast"
165154 "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
166155 "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
167 CCDelegateTo>,
168 CCIf<"static_cast"
169 "(State.getMachineFunction().getSubtarget()).getGeneration() < "
170 "AMDGPUSubtarget::SOUTHERN_ISLANDS",
171 CCDelegateToR600>>
156 CCDelegateToAMDGPU_Func>>
172157 ]>;
0 //===-- AMDGPUFeatures.td - AMDGPU Feature Definitions -----*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 def FeatureFP64 : SubtargetFeature<"fp64",
10 "FP64",
11 "true",
12 "Enable double precision operations"
13 >;
14
15 def FeatureFMA : SubtargetFeature<"fmaf",
16 "FMA",
17 "true",
18 "Enable single precision FMA (not as fast as mul+add, but fused)"
19 >;
20
21 class SubtargetFeatureLocalMemorySize : SubtargetFeature<
22 "localmemorysize"#Value,
23 "LocalMemorySize",
24 !cast(Value),
25 "The size of local memory in bytes"
26 >;
27
28 def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
29 def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
30 def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
31
32 class SubtargetFeatureWavefrontSize : SubtargetFeature<
33 "wavefrontsize"#Value,
34 "WavefrontSize",
35 !cast(Value),
36 "The number of threads per wavefront"
37 >;
38
39 def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
40 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
41 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
42
43 class SubtargetFeatureGeneration
44 list Implies> :
45 SubtargetFeature
46 Value#" GPU generation", Implies>;
47
48 def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
49 "DX10Clamp",
50 "true",
51 "clamp modifier clamps NaNs to 0.0"
52 >;
53
54 def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
55 "EnablePromoteAlloca",
56 "true",
57 "Enable promote alloca pass"
58 >;
59
103103 bool isNoNanSrc(SDValue N) const;
104104 bool isInlineImmediate(const SDNode *N) const;
105105
106 bool isConstantLoad(const MemSDNode *N, int cbID) const;
107106 bool isUniformBr(const SDNode *N) const;
108107
109108 SDNode *glueCopyToM0(SDNode *N) const;
110109
111110 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
112 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
113 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
114 SDValue& Offset);
115111 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
116112 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
117113 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
226222 };
227223
228224 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
225 const R600Subtarget *Subtarget;
226 AMDGPUAS AMDGPUASI;
227
228 bool isConstantLoad(const MemSDNode *N, int cbID) const;
229 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
230 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
231 SDValue& Offset);
229232 public:
230233 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
231 AMDGPUDAGToDAGISel(TM, OptLevel) {}
234 AMDGPUDAGToDAGISel(TM, OptLevel) {
235 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
236 }
232237
233238 void Select(SDNode *N) override;
234239
236241 SDValue &Offset) override;
237242 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
238243 SDValue &Offset) override;
244
245 bool runOnMachineFunction(MachineFunction &MF) override;
246 protected:
247 // Include the pieces autogenerated from the target description.
248 #include "R600GenDAGISel.inc"
239249 };
240250
241251 } // end anonymous namespace
279289 }
280290
281291 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
282 const SIInstrInfo *TII
283 = static_cast(Subtarget)->getInstrInfo();
292 const SIInstrInfo *TII = Subtarget->getInstrInfo();
284293
285294 if (const ConstantSDNode *C = dyn_cast(N))
286295 return TII->isInlineConstant(C->getAPIntValue());
636645 SelectCode(N);
637646 }
638647
639 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
640 if (!N->readMem())
641 return false;
642 if (CbId == -1)
643 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
644 N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
645
646 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
647 }
648
649648 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
650649 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
651650 const Instruction *Term = BB->getTerminator();
661660 // Complex Patterns
662661 //===----------------------------------------------------------------------===//
663662
664 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
665 SDValue& IntPtr) {
666 if (ConstantSDNode *Cst = dyn_cast(Addr)) {
667 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
668 true);
669 return true;
670 }
671 return false;
672 }
673
674 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
675 SDValue& BaseReg, SDValue &Offset) {
676 if (!isa(Addr)) {
677 BaseReg = Addr;
678 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
679 return true;
680 }
681 return false;
682 }
683
684663 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
685664 SDValue &Offset) {
686665 return false;
692671 SDLoc DL(Addr);
693672
694673 if ((C = dyn_cast(Addr))) {
695 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
674 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
696675 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
697676 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
698677 (C = dyn_cast(Addr.getOperand(0)))) {
699 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
678 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
700679 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
701680 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
702681 (C = dyn_cast(Addr.getOperand(1)))) {
21592138 } while (IsModified);
21602139 }
21612140
2141 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2142 Subtarget = &MF.getSubtarget();
2143 return SelectionDAGISel::runOnMachineFunction(MF);
2144 }
2145
2146 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2147 if (!N->readMem())
2148 return false;
2149 if (CbId == -1)
2150 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
2151 N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
2152
2153 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
2154 }
2155
2156 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2157 SDValue& IntPtr) {
2158 if (ConstantSDNode *Cst = dyn_cast(Addr)) {
2159 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2160 true);
2161 return true;
2162 }
2163 return false;
2164 }
2165
2166 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2167 SDValue& BaseReg, SDValue &Offset) {
2168 if (!isa(Addr)) {
2169 BaseReg = Addr;
2170 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2171 return true;
2172 }
2173 return false;
2174 }
2175
21622176 void R600DAGToDAGISel::Select(SDNode *N) {
21632177 unsigned int Opc = N->getOpcode();
21642178 if (N->isMachineOpcode()) {
21792193 // pass. We want to avoid 128 bits copies as much as possible because they
21802194 // can't be bundled by our scheduler.
21812195 switch(NumVectorElts) {
2182 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
2196 case 2: RegClassID = R600::R600_Reg64RegClassID; break;
21832197 case 4:
21842198 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
2185 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
2199 RegClassID = R600::R600_Reg128VerticalRegClassID;
21862200 else
2187 RegClassID = AMDGPU::R600_Reg128RegClassID;
2201 RegClassID = R600::R600_Reg128RegClassID;
21882202 break;
21892203 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
21902204 }
22022216 SDLoc DL(Addr);
22032217
22042218 if ((C = dyn_cast(Addr))) {
2205 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
2219 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
22062220 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
22072221 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
22082222 (C = dyn_cast(Addr.getOperand(0)))) {
2209 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
2223 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
22102224 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
22112225 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
22122226 (C = dyn_cast(Addr.getOperand(1)))) {
22372251 && isInt<16>(IMMOffset->getZExtValue())) {
22382252 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
22392253 SDLoc(CurDAG->getEntryNode()),
2240 AMDGPU::ZERO, MVT::i32);
2254 R600::ZERO, MVT::i32);
22412255 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
22422256 MVT::i32);
22432257 return true;
154154 }
155155
156156 AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
157 const AMDGPUSubtarget &STI)
157 const AMDGPUCommonSubtarget &STI)
158158 : TargetLowering(TM), Subtarget(&STI) {
159159 AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
160160 // Lower floating point store/load to integer store/load to reduce the number
329329 setOperationAction(ISD::FLOG, MVT::f32, Custom);
330330 setOperationAction(ISD::FLOG10, MVT::f32, Custom);
331331
332 if (Subtarget->has16BitInsts()) {
333 setOperationAction(ISD::FLOG, MVT::f16, Custom);
334 setOperationAction(ISD::FLOG10, MVT::f16, Custom);
335 }
336332
337333 setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
338334 setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
339335
340336 setOperationAction(ISD::FREM, MVT::f32, Custom);
341337 setOperationAction(ISD::FREM, MVT::f64, Custom);
342
343 // v_mad_f32 does not support denormals according to some sources.
344 if (!Subtarget->hasFP32Denormals())
345 setOperationAction(ISD::FMAD, MVT::f32, Legal);
346338
347339 // Expand to fneg + fadd.
348340 setOperationAction(ISD::FSUB, MVT::f64, Expand);
358350 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
359351 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
360352
361 if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
362 setOperationAction(ISD::FCEIL, MVT::f64, Custom);
363 setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
364 setOperationAction(ISD::FRINT, MVT::f64, Custom);
365 setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
366 }
367
368 if (!Subtarget->hasBFI()) {
369 // fcopysign can be done in a single instruction with BFI.
370 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
371 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
372 }
373
374353 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
375354 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
376355 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
401380 setOperationAction(ISD::ADDE, VT, Legal);
402381 setOperationAction(ISD::SUBE, VT, Legal);
403382 }
404
405 if (!Subtarget->hasBCNT(32))
406 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
407
408 if (!Subtarget->hasBCNT(64))
409 setOperationAction(ISD::CTPOP, MVT::i64, Expand);
410383
411384 // The hardware supports 32-bit ROTR, but not ROTL.
412385 setOperationAction(ISD::ROTL, MVT::i32, Expand);
427400 setOperationAction(ISD::SMAX, MVT::i32, Legal);
428401 setOperationAction(ISD::UMAX, MVT::i32, Legal);
429402
430 if (Subtarget->hasFFBH())
431 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
432
433 if (Subtarget->hasFFBL())
434 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
435
436403 setOperationAction(ISD::CTTZ, MVT::i64, Custom);
437404 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
438405 setOperationAction(ISD::CTLZ, MVT::i64, Custom);
439406 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
440
441 // We only really have 32-bit BFE instructions (and 16-bit on VI).
442 //
443 // On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
444 // effort to match them now. We want this to be false for i64 cases when the
445 // extraction isn't restricted to the upper or lower half. Ideally we would
446 // have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
447 // span the midpoint are probably relatively rare, so don't worry about them
448 // for now.
449 if (Subtarget->hasBFE())
450 setHasExtractBitsInsn(true);
451407
452408 static const MVT::SimpleValueType VectorIntTypes[] = {
453409 MVT::v2i32, MVT::v4i32
553509 // vector compares until that is fixed.
554510 setHasMultipleConditionRegisters(true);
555511
556 // SI at least has hardware support for floating point exceptions, but no way
557 // of using or handling them is implemented. They are also optional in OpenCL
558 // (Section 7.3)
559 setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
560
561512 PredictableSelectIsExpensive = false;
562513
563514 // We want to find all load dependencies for long chains of stores to enable
780731 {
781732 const LoadSDNode * L = dyn_cast(N);
782733 if (L->getMemOperand()->getAddrSpace()
783 == Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT)
734 == AMDGPUASI.CONSTANT_ADDRESS_32BIT)
784735 return true;
785736 return false;
786737 }
42894240 switch (IID) {
42904241 case Intrinsic::amdgcn_mbcnt_lo:
42914242 case Intrinsic::amdgcn_mbcnt_hi: {
4243 const SISubtarget &ST =
4244 DAG.getMachineFunction().getSubtarget();
42924245 // These return at most the wavefront size - 1.
42934246 unsigned Size = Op.getValueType().getSizeInBits();
4294 Known.Zero.setHighBits(Size - Subtarget->getWavefrontSizeLog2());
4247 Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());
42954248 break;
42964249 }
42974250 default:
2222 namespace llvm {
2323
2424 class AMDGPUMachineFunction;
25 class AMDGPUSubtarget;
25 class AMDGPUCommonSubtarget;
2626 struct ArgDescriptor;
2727
2828 class AMDGPUTargetLowering : public TargetLowering {
2929 private:
30 const AMDGPUCommonSubtarget *Subtarget;
31
3032 /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
3133 /// legalized from a smaller type VT. Need to match pre-legalized type because
3234 /// the generic legalization inserts the add/sub between the select and
3840 static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
3941
4042 protected:
41 const AMDGPUSubtarget *Subtarget;
4243 AMDGPUAS AMDGPUASI;
4344
4445 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
123124 void analyzeFormalArgumentsCompute(CCState &State,
124125 const SmallVectorImpl &Ins) const;
125126 public:
126 AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
127 AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUCommonSubtarget &STI);
127128
128129 bool mayIgnoreSignedZero(SDValue Op) const {
129130 if (getTargetMachine().Options.NoSignedZerosFPMath)
77 //===----------------------------------------------------------------------===//
88 //
99 /// \file
10 /// Implementation of the TargetInstrInfo class that is common to all
10 /// \brief Implementation of the TargetInstrInfo class that is common to all
1111 /// AMD GPUs.
1212 //
1313 //===----------------------------------------------------------------------===//
2222
2323 using namespace llvm;
2424
25 #define GET_INSTRINFO_CTOR_DTOR
26 #include "AMDGPUGenInstrInfo.inc"
25 // Pin the vtable to this file.
26 //void AMDGPUInstrInfo::anchor() {}
2727
28 namespace llvm {
29 namespace AMDGPU {
30 #define GET_D16ImageDimIntrinsics_IMPL
31 #define GET_ImageDimIntrinsicTable_IMPL
32 #define GET_RsrcIntrinsics_IMPL
33 #include "AMDGPUGenSearchableTables.inc"
34 }
35 }
28 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) { }
3629
37 // Pin the vtable to this file.
38 void AMDGPUInstrInfo::anchor() {}
39
40 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
41 : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
42 ST(ST),
43 AMDGPUASI(ST.getAMDGPUAS()) {}
44
45 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
46 // the first 16 loads will be interleaved with the stores, and the next 16 will
47 // be clustered as expected. It should really split into 2 16 store batches.
48 //
49 // Loads are clustered until this returns false, rather than trying to schedule
50 // groups of stores. This also means we have to deal with saying different
51 // address space loads should be clustered, and ones which might cause bank
52 // conflicts.
53 //
54 // This might be deprecated so it might not be worth that much effort to fix.
55 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
56 int64_t Offset0, int64_t Offset1,
57 unsigned NumLoads) const {
58 assert(Offset1 > Offset0 &&
59 "Second offset should be larger than first offset!");
60 // If we have less than 16 loads in a row, and the offsets are within 64
61 // bytes, then schedule together.
62
63 // A cacheline is 64 bytes (for global memory).
64 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
65 }
66
67 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
68 enum SIEncodingFamily {
69 SI = 0,
70 VI = 1,
71 SDWA = 2,
72 SDWA9 = 3,
73 GFX80 = 4,
74 GFX9 = 5
75 };
76
77 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
78 switch (ST.getGeneration()) {
79 case AMDGPUSubtarget::SOUTHERN_ISLANDS:
80 case AMDGPUSubtarget::SEA_ISLANDS:
81 return SIEncodingFamily::SI;
82 case AMDGPUSubtarget::VOLCANIC_ISLANDS:
83 case AMDGPUSubtarget::GFX9:
84 return SIEncodingFamily::VI;
85
86 // FIXME: This should never be called for r600 GPUs.
87 case AMDGPUSubtarget::R600:
88 case AMDGPUSubtarget::R700:
89 case AMDGPUSubtarget::EVERGREEN:
90 case AMDGPUSubtarget::NORTHERN_ISLANDS:
91 return SIEncodingFamily::SI;
92 }
93
94 llvm_unreachable("Unknown subtarget generation!");
95 }
96
97 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
98 SIEncodingFamily Gen = subtargetEncodingFamily(ST);
99
100 if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
101 ST.getGeneration() >= AMDGPUSubtarget::GFX9)
102 Gen = SIEncodingFamily::GFX9;
103
104 if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
105 Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
106 : SIEncodingFamily::SDWA;
107 // Adjust the encoding family to GFX80 for D16 buffer instructions when the
108 // subtarget has UnpackedD16VMem feature.
109 // TODO: remove this when we discard GFX80 encoding.
110 if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
111 Gen = SIEncodingFamily::GFX80;
112
113 int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
114
115 // -1 means that Opcode is already a native instruction.
116 if (MCOp == -1)
117 return Opcode;
118
119 // (uint16_t)-1 means that Opcode is a pseudo instruction that has
120 // no encoding in the given subtarget generation.
121 if (MCOp == (uint16_t)-1)
122 return -1;
123
124 return MCOp;
125 }
12630
12731 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
12832 bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
1919 #include "Utils/AMDGPUBaseInfo.h"
2020 #include "llvm/CodeGen/TargetInstrInfo.h"
2121
22 #define GET_INSTRINFO_HEADER
23 #include "AMDGPUGenInstrInfo.inc"
24 #undef GET_INSTRINFO_HEADER
25
2622 namespace llvm {
2723
2824 class AMDGPUSubtarget;
3026 class MachineInstr;
3127 class MachineInstrBuilder;
3228
33 class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
34 private:
35 const AMDGPUSubtarget &ST;
36
37 virtual void anchor();
38 protected:
39 AMDGPUAS AMDGPUASI;
40
29 class AMDGPUInstrInfo {
4130 public:
4231 explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
43
44 bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
45 int64_t Offset1, int64_t Offset2,
46 unsigned NumLoads) const override;
47
48 /// Return a target-specific opcode if Opcode is a pseudo instruction.
49 /// Return -1 if the target-specific opcode for the pseudo instruction does
50 /// not exist. If Opcode is not a pseudo instruction, this is identity.
51 int pseudoToMCOpcode(int Opcode) const;
5232
5333 static bool isUniformMMO(const MachineMemOperand *MMO);
5434 };
4040
4141 field bits<32> Inst = 0xffffffff;
4242 }
43
44 //===---------------------------------------------------------------------===//
45 // Return instruction
46 //===---------------------------------------------------------------------===//
47
48 class ILFormat pattern>
49 : Instruction {
50
51 let Namespace = "AMDGPU";
52 dag OutOperandList = outs;
53 dag InOperandList = ins;
54 let Pattern = pattern;
55 let AsmString = !strconcat(asmstr, "\n");
56 let isPseudo = 1;
57 let Itinerary = NullALU;
58 bit hasIEEEFlag = 0;
59 bit hasZeroOpFlag = 0;
60 let mayLoad = 0;
61 let mayStore = 0;
62 let hasSideEffects = 0;
63 let isCodeGenOnly = 1;
64 }
65
66 def TruePredicate : Predicate<"true">;
67
68 // Exists to help track down where SubtargetPredicate isn't set rather
69 // than letting tablegen crash with an unhelpful error.
70 def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
71
72 class PredicateControl {
73 Predicate SubtargetPredicate = InvalidPred;
74 list AssemblerPredicates = [];
75 Predicate AssemblerPredicate = TruePredicate;
76 list OtherPredicates = [];
77 list Predicates = !listconcat([SubtargetPredicate,
78 AssemblerPredicate],
79 AssemblerPredicates,
80 OtherPredicates);
81 }
82 class AMDGPUPat : Pat,
83 PredicateControl;
4384
4485 def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
4586 def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
93134 // Misc. PatFrags
94135 //===----------------------------------------------------------------------===//
95136
96 class HasOneUseUnaryOp : PatFrag<
97 (ops node:$src0),
98 (op $src0),
99 [{ return N->hasOneUse(); }]
100 >;
101
102137 class HasOneUseBinOp : PatFrag<
103138 (ops node:$src0, node:$src1),
104139 (op $src0, $src1),
110145 (op $src0, $src1, $src2),
111146 [{ return N->hasOneUse(); }]
112147 >;
113
114 def trunc_oneuse : HasOneUseUnaryOp;
115148
116149 let Properties = [SDNPCommutative, SDNPAssociative] in {
117150 def smax_oneuse : HasOneUseBinOp;
239272 [{(void)N; return false;}]
240273 >;
241274
275 //===----------------------------------------------------------------------===//
276 // PatLeafs for Texture Constants
277 //===----------------------------------------------------------------------===//
278
279 def TEX_ARRAY : PatLeaf<
280 (imm),
281 [{uint32_t TType = (uint32_t)N->getZExtValue();
282 return TType == 9 || TType == 10 || TType == 16;
283 }]
284 >;
285
286 def TEX_RECT : PatLeaf<
287 (imm),
288 [{uint32_t TType = (uint32_t)N->getZExtValue();
289 return TType == 5;
290 }]
291 >;
292
293 def TEX_SHADOW : PatLeaf<
294 (imm),
295 [{uint32_t TType = (uint32_t)N->getZExtValue();
296 return (TType >= 6 && TType <= 8) || TType == 13;
297 }]
298 >;
299
300 def TEX_SHADOW_ARRAY : PatLeaf<
301 (imm),
302 [{uint32_t TType = (uint32_t)N->getZExtValue();
303 return TType == 11 || TType == 12 || TType == 17;
304 }]
305 >;
242306
243307 //===----------------------------------------------------------------------===//
244308 // Load/Store Pattern Fragments
768832 (AMDGPUrcp (fsqrt vt:$src)),
769833 (RsqInst $src)
770834 >;
771
772 include "R600Instructions.td"
773 include "R700Instructions.td"
774 include "EvergreenInstructions.td"
775 include "CaymanInstructions.td"
776
777 include "SIInstrInfo.td"
778
1313 let TargetPrefix = "AMDGPU", isTarget = 1 in {
1414 def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
1515 }
16
17 include "SIIntrinsics.td"
116116 return false;
117117
118118 const TargetMachine &TM = TPC->getTM();
119 const AMDGPUSubtarget &ST = TM.getSubtarget(F);
120119 bool Changed = false;
121120
122121 for (auto *U : F.users()) {
124123 if (!CI)
125124 continue;
126125
127 Changed |= ST.makeLIDRangeMetadata(CI);
126 Changed |= AMDGPUCommonSubtarget::get(TM, F).makeLIDRangeMetadata(CI);
128127 }
129128 return Changed;
130129 }
151151 IsAMDGCN = TT.getArch() == Triple::amdgcn;
152152 IsAMDHSA = TT.getOS() == Triple::AMDHSA;
153153
154 const AMDGPUSubtarget &ST = TM->getSubtarget(F);
154 const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
155155 if (!ST.isPromoteAllocaEnabled())
156156 return false;
157157
173173
174174 std::pair
175175 AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
176 const AMDGPUSubtarget &ST = TM->getSubtarget(
177 *Builder.GetInsertBlock()->getParent());
176 const Function &F = *Builder.GetInsertBlock()->getParent();
177 const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
178178
179179 if (!IsAMDHSA) {
180180 Function *LocalSizeYFn
260260 }
261261
262262 Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
263 const AMDGPUSubtarget &ST = TM->getSubtarget(
264 *Builder.GetInsertBlock()->getParent());
263 const AMDGPUCommonSubtarget &ST =
264 AMDGPUCommonSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent());
265265 Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
266266
267267 switch (N) {
601601 bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
602602
603603 FunctionType *FTy = F.getFunctionType();
604 const AMDGPUSubtarget &ST = TM->getSubtarget(F);
604 const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
605605
606606 // If the function has any arguments in the local address space, then it's
607607 // possible these arguments require the entire local memory space, so
728728 if (!SufficientLDS)
729729 return false;
730730
731 const AMDGPUSubtarget &ST =
732 TM->getSubtarget(ContainingFunction);
731 const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, ContainingFunction);
733732 unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
734733
735734 const DataLayout &DL = Mod->getDataLayout();
1818
1919 }
2020
21 include "R600RegisterInfo.td"
2221 include "SIRegisterInfo.td"
2222 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
2323 #include "llvm/ADT/SmallString.h"
2424 #include "llvm/CodeGen/MachineScheduler.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
2526 #include "llvm/IR/MDBuilder.h"
2627 #include "llvm/CodeGen/TargetFrameLowering.h"
2728 #include
3334 #define GET_SUBTARGETINFO_TARGET_DESC
3435 #define GET_SUBTARGETINFO_CTOR
3536 #include "AMDGPUGenSubtargetInfo.inc"
37 #define GET_SUBTARGETINFO_TARGET_DESC
38 #define GET_SUBTARGETINFO_CTOR
39 #include "R600GenSubtargetInfo.inc"
3640
3741 AMDGPUSubtarget::~AMDGPUSubtarget() = default;
42
43 R600Subtarget &
44 R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
45 StringRef GPU, StringRef FS) {
46 SmallString<256> FullFS("+promote-alloca,+dx10-clamp,");
47 FullFS += FS;
48 ParseSubtargetFeatures(GPU, FullFS);
49
50 // FIXME: I don't think think Evergreen has any useful support for
51 // denormals, but should be checked. Should we issue a warning somewhere
52 // if someone tries to enable these?
53 if (getGeneration() <= R600Subtarget::NORTHERN_ISLANDS) {
54 FP32Denormals = false;
55 }
56
57 HasMulU24 = getGeneration() >= EVERGREEN;
58 HasMulI24 = hasCaymanISA();
59
60 return *this;
61 }
3862
3963 AMDGPUSubtarget &
4064 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
92116 HasMovrel = true;
93117 }
94118
119 HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
120
95121 return *this;
96122 }
97123
124 AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT,
125 const FeatureBitset &FeatureBits) :
126 TargetTriple(TT),
127 SubtargetFeatureBits(FeatureBits),
128 Has16BitInsts(false),
129 HasMadMixInsts(false),
130 FP32Denormals(false),
131 FPExceptions(false),
132 HasSDWA(false),
133 HasVOP3PInsts(false),
134 HasMulI24(true),
135 HasMulU24(true),
136 HasFminFmaxLegacy(true),
137 EnablePromoteAlloca(false),
138 LocalMemorySize(0),
139 WavefrontSize(0)
140 { }
141
98142 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
99 const TargetMachine &TM)
100 : AMDGPUGenSubtargetInfo(TT, GPU, FS),
143 const TargetMachine &TM) :
144 AMDGPUGenSubtargetInfo(TT, GPU, FS),
145 AMDGPUCommonSubtarget(TT, getFeatureBits()),
146 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
101147 TargetTriple(TT),
102 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
148 Gen(SOUTHERN_ISLANDS),
103149 IsaVersion(ISAVersion0_0_0),
104 WavefrontSize(0),
105 LocalMemorySize(0),
106150 LDSBankCount(0),
107151 MaxPrivateElementSize(0),
108152
109153 FastFMAF32(false),
110154 HalfRate64Ops(false),
111155
112 FP32Denormals(false),
113156 FP64FP16Denormals(false),
114 FPExceptions(false),
115157 DX10Clamp(false),
116158 FlatForGlobal(false),
117159 AutoWaitcntBeforeBarrier(false),
127169
128170 EnableHugePrivateBuffer(false),
129171 EnableVGPRSpilling(false),
130 EnablePromoteAlloca(false),
131172 EnableLoadStoreOpt(false),
132173 EnableUnsafeDSOffsetFolding(false),
133174 EnableSIScheduler(false),
135176 DumpCode(false),
136177
137178 FP64(false),
138 FMA(false),
139 MIMG_R128(false),
140 IsGCN(false),
141179 GCN3Encoding(false),
142180 CIInsts(false),
143181 GFX9Insts(false),
144182 SGPRInitBug(false),
145183 HasSMemRealTime(false),
146 Has16BitInsts(false),
147184 HasIntClamp(false),
148 HasVOP3PInsts(false),
149 HasMadMixInsts(false),
150185 HasFmaMixInsts(false),
151186 HasMovrel(false),
152187 HasVGPRIndexMode(false),
153188 HasScalarStores(false),
154189 HasScalarAtomics(false),
155190 HasInv2PiInlineImm(false),
156 HasSDWA(false),
157191 HasSDWAOmod(false),
158192 HasSDWAScalar(false),
159193 HasSDWASdst(false),
169203 AddNoCarryInsts(false),
170204 HasUnpackedD16VMem(false),
171205
172 R600ALUInst(false),
173 CaymanISA(false),
174 CFALUBug(false),
175 HasVertexCache(false),
176 TexVTXClauseSize(0),
177206 ScalarizeGlobal(false),
178207
179 FeatureDisable(false),
180 InstrItins(getInstrItineraryForCPU(GPU)) {
208 FeatureDisable(false) {
181209 AS = AMDGPU::getAMDGPUAS(TT);
182210 initializeSubtargetDependencies(TT, GPU, FS);
183211 }
184212
185 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
213 unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
186214 const Function &F) const {
187215 if (NWaves == 1)
188216 return getLocalMemorySize();
192220 return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
193221 }
194222
195 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
223 unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
196224 const Function &F) const {
197225 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
198226 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
205233 }
206234
207235 unsigned
208 AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
236 AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
209237 const auto *MFI = MF.getInfo();
210238 return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
211239 }
212240
213241 std::pair
214 AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
242 AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
215243 switch (CC) {
216244 case CallingConv::AMDGPU_CS:
217245 case CallingConv::AMDGPU_KERNEL:
229257 }
230258 }
231259
232 std::pair AMDGPUSubtarget::getFlatWorkGroupSizes(
260 std::pair AMDGPUCommonSubtarget::getFlatWorkGroupSizes(
233261 const Function &F) const {
234262 // FIXME: 1024 if function.
235263 // Default minimum/maximum flat work group sizes.
259287 return Requested;
260288 }
261289
262 std::pair AMDGPUSubtarget::getWavesPerEU(
290 std::pair AMDGPUCommonSubtarget::getWavesPerEU(
263291 const Function &F) const {
264292 // Default minimum/maximum number of waves per execution unit.
265293 std::pair Default(1, getMaxWavesPerEU());
307335 return Requested;
308336 }
309337
310 bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
338 bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const {
311339 Function *Kernel = I->getParent()->getParent();
312340 unsigned MinSize = 0;
313341 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
371399
372400 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
373401 const TargetMachine &TM) :
374 AMDGPUSubtarget(TT, GPU, FS, TM),
402 R600GenSubtargetInfo(TT, GPU, FS),
403 AMDGPUCommonSubtarget(TT, getFeatureBits()),
375404 InstrInfo(*this),
376405 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
377 TLInfo(TM, *this) {}
406 FMA(false),
407 CaymanISA(false),
408 CFALUBug(false),
409 DX10Clamp(false),
410 HasVertexCache(false),
411 R600ALUInst(false),
412 FP64(false),
413 TexVTXClauseSize(0),
414 Gen(R600),
415 TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
416 InstrItins(getInstrItineraryForCPU(GPU)),
417 AS (AMDGPU::getAMDGPUAS(TT)) { }
378418
379419 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
380420 const GCNTargetMachine &TM)
618658 std::vector> &Mutations) const {
619659 Mutations.push_back(llvm::make_unique(&InstrInfo));
620660 }
661
662 const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const MachineFunction &MF) {
663 if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)
664 return static_cast(MF.getSubtarget());
665 else
666 return static_cast(MF.getSubtarget());
667 }
668
669 const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const TargetMachine &TM, const Function &F) {
670 if (TM.getTargetTriple().getArch() == Triple::amdgcn)
671 return static_cast(TM.getSubtarget(F));
672 else
673 return static_cast(TM.getSubtarget(F));
674 }
3838
3939 #define GET_SUBTARGETINFO_HEADER
4040 #include "AMDGPUGenSubtargetInfo.inc"
41 #define GET_SUBTARGETINFO_HEADER
42 #include "R600GenSubtargetInfo.inc"
4143
4244 namespace llvm {
4345
4446 class StringRef;
4547
46 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
48 class AMDGPUCommonSubtarget {
49 private:
50 Triple TargetTriple;
51
52 protected:
53 const FeatureBitset &SubtargetFeatureBits;
54 bool Has16BitInsts;
55 bool HasMadMixInsts;
56 bool FP32Denormals;
57 bool FPExceptions;
58 bool HasSDWA;
59 bool HasVOP3PInsts;
60 bool HasMulI24;
61 bool HasMulU24;
62 bool HasFminFmaxLegacy;
63 bool EnablePromoteAlloca;
64 int LocalMemorySize;
65 unsigned WavefrontSize;
66
67 public:
68 AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
69
70 static const AMDGPUCommonSubtarget &get(const MachineFunction &MF);
71 static const AMDGPUCommonSubtarget &get(const TargetMachine &TM,
72 const Function &F);
73
74 /// \returns Default range flat work group size for a calling convention.
75 std::pair getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
76
77 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
78 /// for function \p F, or minimum/maximum flat work group sizes explicitly
79 /// requested using "amdgpu-flat-work-group-size" attribute attached to
80 /// function \p F.
81 ///
82 /// \returns Subtarget's default values if explicitly requested values cannot
83 /// be converted to integer, or violate subtarget's specifications.
84 std::pair getFlatWorkGroupSizes(const Function &F) const;
85
86 /// \returns Subtarget's default pair of minimum/maximum number of waves per
87 /// execution unit for function \p F, or minimum/maximum number of waves per
88 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
89 /// attached to function \p F.
90 ///
91 /// \returns Subtarget's default values if explicitly requested values cannot
92 /// be converted to integer, violate subtarget's specifications, or are not
93 /// compatible with minimum/maximum number of waves limited by flat work group
94 /// size, register usage, and/or lds usage.
95 std::pair getWavesPerEU(const Function &F) const;
96
97 /// Return the amount of LDS that can be used that will not restrict the
98 /// occupancy lower than WaveCount.
99 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
100 const Function &) const;
101
102 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
103 /// the given LDS memory size is the only constraint.
104 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
105
106 unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
107
108 bool isAmdHsaOS() const {
109 return TargetTriple.getOS() == Triple::AMDHSA;
110 }
111
112 bool isAmdPalOS() const {
113 return TargetTriple.getOS() == Triple::AMDPAL;
114 }
115
116 bool has16BitInsts() const {
117 return Has16BitInsts;
118 }
119
120 bool hasMadMixInsts() const {
121 return HasMadMixInsts;
122 }
123
124 bool hasFP32Denormals() const {
125 return FP32Denormals;
126 }
127
128 bool hasFPExceptions() const {
129 return FPExceptions;
130 }
131
132 bool hasSDWA() const {
133 return HasSDWA;
134 }
135
136 bool hasVOP3PInsts() const {
137 return HasVOP3PInsts;
138 }
139
140 bool hasMulI24() const {
141 return HasMulI24;
142 }
143
144 bool hasMulU24() const {
145 return HasMulU24;
146 }
147
148 bool hasFminFmaxLegacy() const {
149 return HasFminFmaxLegacy;
150 }
151
152 bool isPromoteAllocaEnabled() const {
153 return EnablePromoteAlloca;
154 }
155
156 unsigned getWavefrontSize() const {
157 return WavefrontSize;
158 }
159
160 int getLocalMemorySize() const {
161 return LocalMemorySize;
162 }
163
164 unsigned getAlignmentForImplicitArgPtr() const {
165 return isAmdHsaOS() ? 8 : 4;
166 }
167
168 /// \returns Maximum number of work groups per compute unit supported by the
169 /// subtarget and limited by given \p FlatWorkGroupSize.
170 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
171 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
172 FlatWorkGroupSize);
173 }
174
175 /// \returns Minimum flat work group size supported by the subtarget.
176 unsigned getMinFlatWorkGroupSize() const {
177 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
178 }
179
180 /// \returns Maximum flat work group size supported by the subtarget.
181 unsigned getMaxFlatWorkGroupSize() const {
182 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
183 }
184
185 /// \returns Maximum number of waves per execution unit supported by the
186 /// subtarget and limited by given \p FlatWorkGroupSize.
187 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
188 return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
189 FlatWorkGroupSize);
190 }
191
192 /// \returns Minimum number of waves per execution unit supported by the
193 /// subtarget.
194 unsigned getMinWavesPerEU() const {
195 return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
196 }
197
198 unsigned getMaxWavesPerEU() const { return 10; }
199
200 /// Creates value range metadata on an workitemid.* inrinsic call or load.
201 bool makeLIDRangeMetadata(Instruction *I) const;
202
203 virtual ~AMDGPUCommonSubtarget() {}
204 };
205
206 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo,
207 public AMDGPUCommonSubtarget {
47208 public:
48209 enum Generation {
49 R600 = 0,
50 R700,
51 EVERGREEN,
52 NORTHERN_ISLANDS,
53 SOUTHERN_ISLANDS,
54 SEA_ISLANDS,
55 VOLCANIC_ISLANDS,
56 GFX9,
210 // Gap for R600 generations, so we can do comparisons between
211 // AMDGPUSubtarget and r600Subtarget.
212 SOUTHERN_ISLANDS = 4,
213 SEA_ISLANDS = 5,
214 VOLCANIC_ISLANDS = 6,
215 GFX9 = 7,
57216 };
58217
59218 enum {
95254 LLVMTrapHandlerRegValue = 1
96255 };
97256
257 private:
258 SIFrameLowering FrameLowering;
259
260 /// GlobalISel related APIs.
261 std::unique_ptr CallLoweringInfo;
262 std::unique_ptr InstSelector;
263 std::unique_ptr Legalizer;
264 std::unique_ptr RegBankInfo;
265
98266 protected:
99267 // Basic subtarget description.
100268 Triple TargetTriple;
101 Generation Gen;
269 unsigned Gen;
102270 unsigned IsaVersion;
103 unsigned WavefrontSize;
104 int LocalMemorySize;
105271 int LDSBankCount;
106272 unsigned MaxPrivateElementSize;
107273
110276 bool HalfRate64Ops;
111277
112278 // Dynamially set bits that enable features.
113 bool FP32Denormals;
114279 bool FP64FP16Denormals;
115 bool FPExceptions;
116280 bool DX10Clamp;
117281 bool FlatForGlobal;
118282 bool AutoWaitcntBeforeBarrier;
128292 // Used as options.
129293 bool EnableHugePrivateBuffer;
130294 bool EnableVGPRSpilling;
131 bool EnablePromoteAlloca;
132295 bool EnableLoadStoreOpt;
133296 bool EnableUnsafeDSOffsetFolding;
134297 bool EnableSIScheduler;
145308 bool GFX9Insts;
146309 bool SGPRInitBug;
147310 bool HasSMemRealTime;
148 bool Has16BitInsts;
149311 bool HasIntClamp;
150 bool HasVOP3PInsts;
151 bool HasMadMixInsts;
152312 bool HasFmaMixInsts;
153313 bool HasMovrel;
154314 bool HasVGPRIndexMode;
155315 bool HasScalarStores;
156316 bool HasScalarAtomics;
157317 bool HasInv2PiInlineImm;
158 bool HasSDWA;
159318 bool HasSDWAOmod;
160319 bool HasSDWAScalar;
161320 bool HasSDWASdst;
180339 // Dummy feature to use for assembler in tablegen.
181340 bool FeatureDisable;
182341
183 InstrItineraryData InstrItins;
184342 SelectionDAGTargetInfo TSInfo;
185343 AMDGPUAS AS;
186344
192350 AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
193351 StringRef GPU, StringRef FS);
194352
195 const AMDGPUInstrInfo *getInstrInfo() const override = 0;
196 const AMDGPUFrameLowering *getFrameLowering() const override = 0;
197 const AMDGPUTargetLowering *getTargetLowering() const override = 0;
198 const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
199
200 const InstrItineraryData *getInstrItineraryData() const override {
201 return &InstrItins;
353 virtual const SIInstrInfo *getInstrInfo() const override = 0;
354
355 const SIFrameLowering *getFrameLowering() const override {
356 return &FrameLowering;
357 }
358
359 virtual const SITargetLowering *getTargetLowering() const override = 0;
360
361 virtual const SIRegisterInfo *getRegisterInfo() const override = 0;
362
363 const CallLowering *getCallLowering() const override {
364 return CallLoweringInfo.get();
365 }
366
367 const InstructionSelector *getInstructionSelector() const override {
368 return InstSelector.get();
369 }
370
371 const LegalizerInfo *getLegalizerInfo() const override {
372 return Legalizer.get();
373 }
374
375 const RegisterBankInfo *getRegBankInfo() const override {
376 return RegBankInfo.get();
202377 }
203378
204379 // Nothing implemented, just prevent crashes on use.
208383
209384 void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
210385
211 bool isAmdHsaOS() const {
212 return TargetTriple.getOS() == Triple::AMDHSA;
213 }
214
215386 bool isMesa3DOS() const {
216387 return TargetTriple.getOS() == Triple::Mesa3D;
217388 }
218389
219 bool isAmdPalOS() const {
220 return TargetTriple.getOS() == Triple::AMDPAL;
221 }
222
223390 Generation getGeneration() const {
224 return Gen;
225 }
226
227 unsigned getWavefrontSize() const {
228 return WavefrontSize;
391 return (Generation)Gen;
229392 }
230393
231394 unsigned getWavefrontSizeLog2() const {
232395 return Log2_32(WavefrontSize);
233396 }
234397
235 int getLocalMemorySize() const {
236 return LocalMemorySize;
237 }
238
239398 int getLDSBankCount() const {
240399 return LDSBankCount;
241400 }
248407 return AS;
249408 }
250409
251 bool has16BitInsts() const {
252 return Has16BitInsts;
253 }
254
255410 bool hasIntClamp() const {
256411 return HasIntClamp;
257412 }
258413
259 bool hasVOP3PInsts() const {
260 return HasVOP3PInsts;
261 }
262
263414 bool hasFP64() const {
264415 return FP64;
265416 }
268419 return MIMG_R128;
269420 }
270421
422 bool hasHWFP64() const {
423 return FP64;
424 }
425
271426 bool hasFastFMAF32() const {
272427 return FastFMAF32;
273428 }
277432 }
278433
279434 bool hasAddr64() const {
280 return (getGeneration() < VOLCANIC_ISLANDS);
435 return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
281436 }
282437
283438 bool hasBFE() const {
284 return (getGeneration() >= EVERGREEN);
439 return true;
285440 }
286441
287442 bool hasBFI() const {
288 return (getGeneration() >= EVERGREEN);
443 return true;
289444 }
290445
291446 bool hasBFM() const {
293448 }
294449
295450 bool hasBCNT(unsigned Size) const {
296 if (Size == 32)
297 return (getGeneration() >= EVERGREEN);
298
299 if (Size == 64)
300 return (getGeneration() >= SOUTHERN_ISLANDS);
301
302 return false;
303 }
304
305 bool hasMulU24() const {
306 return (getGeneration() >= EVERGREEN);
307 }
308
309 bool hasMulI24() const {
310 return (getGeneration() >= SOUTHERN_ISLANDS ||
311 hasCaymanISA());
451 return true;
312452 }
313453
314454 bool hasFFBL() const {
315 return (getGeneration() >= EVERGREEN);
455 return true;
316456 }
317457
318458 bool hasFFBH() const {
319 return (getGeneration() >= EVERGREEN);
459 return true;
320460 }
321461
322462 bool hasMed3_16() const {
323 return getGeneration() >= GFX9;
463 return getGeneration() >= AMDGPUSubtarget::GFX9;
324464 }
325465
326466 bool hasMin3Max3_16() const {
327 return getGeneration() >= GFX9;
328 }
329
330 bool hasMadMixInsts() const {
331 return HasMadMixInsts;
467 return getGeneration() >= AMDGPUSubtarget::GFX9;
332468 }
333469
334470 bool hasFmaMixInsts() const {
336472 }
337473
338474 bool hasCARRY() const {
339 return (getGeneration() >= EVERGREEN);
340 }
341
342 bool hasBORROW() const {
343 return (getGeneration() >= EVERGREEN);
344 }
345
346 bool hasCaymanISA() const {
347 return CaymanISA;
475 return true;
348476 }
349477
350478 bool hasFMA() const {
357485
358486 bool enableHugePrivateBuffer() const {
359487 return EnableHugePrivateBuffer;
360 }
361
362 bool isPromoteAllocaEnabled() const {
363 return EnablePromoteAlloca;
364488 }
365489
366490 bool unsafeDSOffsetFoldingEnabled() const {
376500 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
377501 const Function &) const;
378502
379 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
380 /// the given LDS memory size is the only constraint.
381 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
382
383 unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
384
385503 bool hasFP16Denormals() const {
386504 return FP64FP16Denormals;
387505 }
388506
389 bool hasFP32Denormals() const {
390 return FP32Denormals;
391 }
392
393507 bool hasFP64Denormals() const {
394508 return FP64FP16Denormals;
395509 }
396510
397511 bool supportsMinMaxDenormModes() const {
398512 return getGeneration() >= AMDGPUSubtarget::GFX9;
399 }
400
401 bool hasFPExceptions() const {
402 return FPExceptions;
403513 }
404514
405515 bool enableDX10Clamp() const {
443553 }
444554
445555 bool hasApertureRegs() const {
446 return HasApertureRegs;
556 return HasApertureRegs;
447557 }
448558
449559 bool isTrapHandlerEnabled() const {
509619 return getGeneration() >= SEA_ISLANDS;
510620 }
511621
512 bool hasFminFmaxLegacy() const {
513 return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
514 }
515
516 bool hasSDWA() const {
517 return HasSDWA;
518 }
519
520622 bool hasSDWAOmod() const {
521623 return HasSDWAOmod;
522624 }
553655 /// of the first explicit kernel argument.
554656 unsigned getExplicitKernelArgOffset(const Function &F) const {
555657 return isAmdCodeObjectV2(F) ? 0 : 36;
556 }
557
558 unsigned getAlignmentForImplicitArgPtr() const {
559 return isAmdHsaOS() ? 8 : 4;
560658 }
561659
562660 /// \returns Number of bytes of arguments that are passed to a shader or
587685 return true;
588686 }
589687
590 void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
591 bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
688 void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
689 bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
592690
593691 /// \returns Number of execution units per compute unit supported by the
594692 /// subtarget.
595693 unsigned getEUsPerCU() const {
596 return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
597 }
598
599 /// \returns Maximum number of work groups per compute unit supported by the
600 /// subtarget and limited by given \p FlatWorkGroupSize.
601 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
602 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
603 FlatWorkGroupSize);
694 return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
604695 }
605696
606697 /// \returns Maximum number of waves per compute unit supported by the
607698 /// subtarget without any kind of limitation.
608699 unsigned getMaxWavesPerCU() const {
609 return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
700 return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
610701 }
611702
612703 /// \returns Maximum number of waves per compute unit supported by the
613704 /// subtarget and limited by given \p FlatWorkGroupSize.
614705 unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
615 return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
706 return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
616707 FlatWorkGroupSize);
617 }
618
619 /// \returns Minimum number of waves per execution unit supported by the
620 /// subtarget.
621 unsigned getMinWavesPerEU() const {
622 return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
623708 }
624709
625710 /// \returns Maximum number of waves per execution unit supported by the
626711 /// subtarget without any kind of limitation.
627712 unsigned getMaxWavesPerEU() const {
628 return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
629 }
630
631 /// \returns Maximum number of waves per execution unit supported by the
632 /// subtarget and limited by given \p FlatWorkGroupSize.
633 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
634 return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
635 FlatWorkGroupSize);
636 }
637
638 /// \returns Minimum flat work group size supported by the subtarget.
639 unsigned getMinFlatWorkGroupSize() const {
640 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
641 }
642
643 /// \returns Maximum flat work group size supported by the subtarget.
644 unsigned getMaxFlatWorkGroupSize() const {
645 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
713 return AMDGPU::IsaInfo::getMaxWavesPerEU();
646714 }
647715
648716 /// \returns Number of waves per work group supported by the subtarget and
649717 /// limited by given \p FlatWorkGroupSize.
650718 unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
651 return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
652 FlatWorkGroupSize);
653 }
654
655 /// \returns Default range flat work group size for a calling convention.
656 std::pair getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
657
658 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
659 /// for function \p F, or minimum/maximum flat work group sizes explicitly
660 /// requested using "amdgpu-flat-work-group-size" attribute attached to
661 /// function \p F.
662 ///
663 /// \returns Subtarget's default values if explicitly requested values cannot
664 /// be converted to integer, or violate subtarget's specifications.
665 std::pair getFlatWorkGroupSizes(const Function &F) const;
666
667 /// \returns Subtarget's default pair of minimum/maximum number of waves per
668 /// execution unit for function \p F, or minimum/maximum number of waves per
669 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
670 /// attached to function \p F.
671 ///
672 /// \returns Subtarget's default values if explicitly requested values cannot
673 /// be converted to integer, violate subtarget's specifications, or are not
674 /// compatible with minimum/maximum number of waves limited by flat work group
675 /// size, register usage, and/or lds usage.
676 std::pair getWavesPerEU(const Function &F) const;
677
678 /// Creates value range metadata on an workitemid.* inrinsic call or load.
679 bool makeLIDRangeMetadata(Instruction *I) const;
680 };
681
682 class R600Subtarget final : public AMDGPUSubtarget {
683 private:
684 R600InstrInfo InstrInfo;
685 R600FrameLowering FrameLowering;
686 R600TargetLowering TLInfo;
687
688 public:
689 R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
690 const TargetMachine &TM);
691
692 const R600InstrInfo *getInstrInfo() const override {
693 return &InstrInfo;
694 }
695
696 const R600FrameLowering *getFrameLowering() const override {
697 return &FrameLowering;
698 }
699
700 const R600TargetLowering *getTargetLowering() const override {
701 return &TLInfo;
702 }
703
704 const R600RegisterInfo *getRegisterInfo() const override {
705 return &InstrInfo.getRegisterInfo();
706 }
707
708 bool hasCFAluBug() const {
709 return CFALUBug;
710 }
711
712 bool hasVertexCache() const {
713 return HasVertexCache;
714 }
715
716 short getTexVTXClauseSize() const {
717 return TexVTXClauseSize;
719 return AMDGPU::IsaInfo::getWavesPerWorkGroup(
720 MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
718721 }
719722 };
720723
765768 const SIRegisterInfo *getRegisterInfo() const override {
766769 return &InstrInfo.getRegisterInfo();
767770 }
771 // static wrappers
772 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
768773
769774 // XXX - Why is this here if it isn't in the default pass set?
770775 bool enableEarlyIfConversion() const override {
774779 void overrideSchedPolicy(MachineSchedPolicy &Policy,
775780 unsigned NumRegionInstrs) const override;
776781
777 bool isVGPRSpillingEnabled(const Function& F) const;
782 bool isVGPRSpillingEnabled(const Function &F) const;
778783
779784 unsigned getMaxNumUserSGPRs() const {
780785 return 16;
859864 unsigned getKernArgSegmentSize(const Function &F,
860865 unsigned ExplictArgBytes) const;
861866
862 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
867 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
868 /// SGPRs
863869 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
864870
865 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
871 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
872 /// VGPRs
866873 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
867874
868875 /// \returns true if the flat_scratch register should be initialized with the
869876 /// pointer to the wave's scratch memory rather than a size and offset.
870877 bool flatScratchIsPointer() const {
871 return getGeneration() >= GFX9;
878 return getGeneration() >= AMDGPUSubtarget::GFX9;
872879 }
873880
874881 /// \returns true if the machine has merged shaders in which s0-s7 are
879886
880887 /// \returns SGPR allocation granularity supported by the subtarget.
881888 unsigned getSGPRAllocGranule() const {
882 return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
889 return AMDGPU::IsaInfo::getSGPRAllocGranule(
890 MCSubtargetInfo::getFeatureBits());
883891 }
884892
885893 /// \returns SGPR encoding granularity supported by the subtarget.
886894 unsigned getSGPREncodingGranule() const {
887 return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
895 return AMDGPU::IsaInfo::getSGPREncodingGranule(
896 MCSubtargetInfo::getFeatureBits());
888897 }
889898
890899 /// \returns Total number of SGPRs supported by the subtarget.
891900 unsigned getTotalNumSGPRs() const {
892 return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
901 return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
893902 }
894903
895904 /// \returns Addressable number of SGPRs supported by the subtarget.
896905 unsigned getAddressableNumSGPRs() const {
897 return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
906 return AMDGPU::IsaInfo::getAddressableNumSGPRs(
907 MCSubtargetInfo::getFeatureBits());
898908 }
899909
900910 /// \returns Minimum number of SGPRs that meets the given number of waves per
901911 /// execution unit requirement supported by the subtarget.
902912 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
903 return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
913 return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
914 WavesPerEU);
904915 }
905916
906917 /// \returns Maximum number of SGPRs that meets the given number of waves per
907918 /// execution unit requirement supported by the subtarget.
908919 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
909 return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
910 Addressable);
920 return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
921 WavesPerEU, Addressable);
911922 }
912923
913924 /// \returns Reserved number of SGPRs for given function \p MF.
925936
926937 /// \returns VGPR allocation granularity supported by the subtarget.
927938 unsigned getVGPRAllocGranule() const {
928 return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
939 return AMDGPU::IsaInfo::getVGPRAllocGranule(
940 MCSubtargetInfo::getFeatureBits());
929941 }
930942
931943 /// \returns VGPR encoding granularity supported by the subtarget.
932944 unsigned getVGPREncodingGranule() const {
933 return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
945 return AMDGPU::IsaInfo::getVGPREncodingGranule(
946 MCSubtargetInfo::getFeatureBits());
934947 }
935948
936949 /// \returns Total number of VGPRs supported by the subtarget.
937950 unsigned getTotalNumVGPRs() const {
938 return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
951 return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
939952 }
940953
941954 /// \returns Addressable number of VGPRs supported by the subtarget.
942955 unsigned getAddressableNumVGPRs() const {
943 return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
956 return AMDGPU::IsaInfo::getAddressableNumVGPRs(
957 MCSubtargetInfo::getFeatureBits());
944958 }
945959
946960 /// \returns Minimum number of VGPRs that meets given number of waves per
947961 /// execution unit requirement supported by the subtarget.
948962 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
949 return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
963 return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
964 WavesPerEU);
950965 }
951966
952967 /// \returns Maximum number of VGPRs that meets given number of waves per
953968 /// execution unit requirement supported by the subtarget.
954969 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
955 return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
970 return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
971 WavesPerEU);
956972 }
957973
958974 /// \returns Maximum number of VGPRs that meets number of waves per execution
970986 const override;
971987 };
972988
989
990 class R600Subtarget final : public R600GenSubtargetInfo,
991 public AMDGPUCommonSubtarget {
992 public:
993 enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 };
994
995 private:
996 R600InstrInfo InstrInfo;
997 R600FrameLowering FrameLowering;
998 bool FMA;
999 bool CaymanISA;
1000 bool CFALUBug;
1001 bool DX10Clamp;
1002 bool HasVertexCache;
1003 bool R600ALUInst;
1004 bool FP64;
1005 short TexVTXClauseSize;
1006 Generation Gen;
1007 R600TargetLowering TLInfo;
1008 InstrItineraryData InstrItins;
1009 SelectionDAGTargetInfo TSInfo;
1010 AMDGPUAS AS;
1011
1012 public:
1013 R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
1014 const TargetMachine &TM);
1015
1016 const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
1017
1018 const R600FrameLowering *getFrameLowering() const override {
1019 return &FrameLowering;
1020 }
1021
1022 const R600TargetLowering *getTargetLowering() const override {
1023 return &TLInfo;
1024 }
1025
1026 const R600RegisterInfo *getRegisterInfo() const override {
1027 return &InstrInfo.getRegisterInfo();
1028 }
1029
1030 const InstrItineraryData *getInstrItineraryData() const override {
1031 return &InstrItins;
1032 }
1033
1034 // Nothing implemented, just prevent crashes on use.
1035 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
1036 return &TSInfo;
1037 }
1038
1039 void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1040
1041 Generation getGeneration() const {
1042 return Gen;
1043 }
1044
1045 unsigned getStackAlignment() const {
1046 return 4;
1047 }
1048
1049 R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1050 StringRef GPU, StringRef FS);
1051
1052 bool hasBFE() const {
1053 return (getGeneration() >= EVERGREEN);
1054 }
1055
1056 bool hasBFI() const {
1057 return (getGeneration() >= EVERGREEN);
1058 }
1059
1060 bool hasBCNT(unsigned Size) const {
1061 if (Size == 32)
1062 return (getGeneration() >= EVERGREEN);
1063
1064 return false;
1065 }
1066
1067 bool hasBORROW() const {
1068 return (getGeneration() >= EVERGREEN);
1069 }
1070
1071 bool hasCARRY() const {
1072 return (getGeneration() >= EVERGREEN);
1073 }
1074
1075 bool hasCaymanISA() const {
1076 return CaymanISA;
1077 }
1078
1079 bool hasFFBL() const {
1080 return (getGeneration() >= EVERGREEN);
1081 }
1082
1083 bool hasFFBH() const {
1084 return (getGeneration() >= EVERGREEN);
1085 }
1086
1087 bool hasFMA() const { return FMA; }
1088
1089 unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
1090 return 36;
1091 }
1092
1093 bool hasCFAluBug() const { return CFALUBug; }
1094
1095 bool hasVertexCache() const { return HasVertexCache; }
1096
1097 short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1098
1099 AMDGPUAS getAMDGPUAS() const { return AS; }
1100
1101 bool enableMachineScheduler() const override {
1102 return true;
1103 }
1104
1105 bool enableSubRegLiveness() const override {
1106 return true;
1107 }
1108 };
1109
9731110 } // end namespace llvm
9741111
9751112 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
3333 class AMDGPUTargetMachine : public LLVMTargetMachine {
3434 protected:
3535 std::unique_ptr TLOF;
36 AMDGPUIntrinsicInfo IntrinsicInfo;
3736 AMDGPUAS AS;
3837
3938 StringRef getGPUName(const Function &F) const;
4847 CodeGenOpt::Level OL);
4948 ~AMDGPUTargetMachine() override;
5049
51 const AMDGPUSubtarget *getSubtargetImpl() const;
52 const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override = 0;
53
54 const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
55 return &IntrinsicInfo;
56 }
50 const TargetSubtargetInfo *getSubtargetImpl() const;
51 const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override = 0;
5752
5853 TargetLoweringObjectFile *getObjFileLowering() const override {
5954 return TLOF.get();
10297
10398 class GCNTargetMachine final : public AMDGPUTargetMachine {
10499 private:
100 AMDGPUIntrinsicInfo IntrinsicInfo;
105101 mutable StringMap> SubtargetMap;
106102
107103 public:
116112
117113 TargetTransformInfo getTargetTransformInfo(const Function &F) override;
118114
115 const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
116 return &IntrinsicInfo;
117 }
118
119119 bool useIPRA() const override {
120120 return true;
121121 }
101101 unsigned ThresholdPrivate = UnrollThresholdPrivate;
102102 unsigned ThresholdLocal = UnrollThresholdLocal;
103103 unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
104 AMDGPUAS ASST = ST->getAMDGPUAS();
104 const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple);
105105 for (const BasicBlock *BB : L->getBlocks()) {
106106 const DataLayout &DL = BB->getModule()->getDataLayout();
107107 unsigned LocalGEPsSeen = 0;
4444
4545 friend BaseT;
4646
47 const AMDGPUSubtarget *ST;
48 const AMDGPUTargetLowering *TLI;
47 Triple TargetTriple;
4948
5049 public:
5150 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
5251 : BaseT(TM, F.getParent()->getDataLayout()),
53 ST(TM->getSubtargetImpl(F)),
54 TLI(ST->getTargetLowering()) {}
55
56 const AMDGPUSubtarget *getST() const { return ST; }
57 const AMDGPUTargetLowering *getTLI() const { return TLI; }
52 TargetTriple(TM->getTargetTriple()) {}
5853
5954 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
6055 TTI::UnrollingPreferences &UP);
122117 public:
123118 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
124119 : BaseT(TM, F.getParent()->getDataLayout()),
125 ST(TM->getSubtargetImpl(F)),
120 ST(static_cast(TM->getSubtargetImpl(F))),
126121 TLI(ST->getTargetLowering()),
127122 CommonTTI(TM, F),
128123 IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
210205
211206 friend BaseT;
212207
213 const AMDGPUSubtarget *ST;
208 const R600Subtarget *ST;
214209 const AMDGPUTargetLowering *TLI;
215210 AMDGPUTTIImpl CommonTTI;
216211
217212 public:
218213 explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
219214 : BaseT(TM, F.getParent()->getDataLayout()),
220 ST(TM->getSubtargetImpl(F)),
215 ST(static_cast(TM->getSubtargetImpl(F))),
221216 TLI(ST->getTargetLowering()),
222217 CommonTTI(TM, F) {}
223218
224 const AMDGPUSubtarget *getST() const { return ST; }
219 const R600Subtarget *getST() const { return ST; }
225220 const AMDGPUTargetLowering *getTLI() const { return TLI; }
226221
227222 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
431431 for (;; --I) {
432432 if (I == MBB.end())
433433 continue;
434 if (I->getOpcode() == AMDGPU::PRED_X) {
434 if (I->getOpcode() == R600::PRED_X) {
435435 switch (I->getOperand(2).getImm()) {
436 case AMDGPU::PRED_SETE_INT:
437 I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
436 case R600::PRED_SETE_INT:
437 I->getOperand(2).setImm(R600::PRED_SETNE_INT);
438438 return;
439 case AMDGPU::PRED_SETNE_INT:
440 I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
439 case R600::PRED_SETNE_INT:
440 I->getOperand(2).setImm(R600::PRED_SETE_INT);
441441 return;
442 case AMDGPU::PRED_SETE:
443 I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
442 case R600::PRED_SETE:
443 I->getOperand(2).setImm(R600::PRED_SETNE);
444444 return;
445 case AMDGPU::PRED_SETNE:
446 I->getOperand(2).setImm(AMDGPU::PRED_SETE);
445 case R600::PRED_SETNE:
446 I->getOperand(2).setImm(R600::PRED_SETE);
447447 return;
448448 default:
449449 llvm_unreachable("PRED_X Opcode invalid!");
512512
513513 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
514514 switch(OldOpcode) {
515 case AMDGPU::JUMP_COND:
516 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
517 case AMDGPU::BRANCH_COND_i32:
518 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
515 case R600::JUMP_COND:
516 case R600::JUMP: return R600::IF_PREDICATE_SET;
517 case R600::BRANCH_COND_i32:
518 case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
519519 default: llvm_unreachable("internal error");
520520 }
521521 return -1;
523523
524524 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
525525 switch(OldOpcode) {
526 case AMDGPU::JUMP_COND:
527 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
528 case AMDGPU::BRANCH_COND_i32:
529 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
526 case R600::JUMP_COND:
527 case R600::JUMP: return R600::IF_PREDICATE_SET;
528 case R600::BRANCH_COND_i32:
529 case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
530530 default: llvm_unreachable("internal error");
531531 }
532532 return -1;
534534
535535 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
536536 switch(OldOpcode) {
537 case AMDGPU::JUMP_COND:
538 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
537 case R600::JUMP_COND:
538 case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
539539 default: llvm_unreachable("internal error");
540540 }
541541 return -1;
543543
544544 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
545545 switch(OldOpcode) {
546 case AMDGPU::JUMP_COND:
547 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
546 case R600::JUMP_COND:
547 case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
548548 default: llvm_unreachable("internal error");
549549 }
550550 return -1;
572572
573573 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
574574 switch (MI->getOpcode()) {
575 case AMDGPU::JUMP_COND:
576 case AMDGPU::BRANCH_COND_i32:
577 case AMDGPU::BRANCH_COND_f32: return true;
575 case R600::JUMP_COND:
576 case R600::BRANCH_COND_i32:
577 case R600::BRANCH_COND_f32: return true;
578578 default:
579579 return false;
580580 }
583583
584584 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
585585 switch (MI->getOpcode()) {
586 case AMDGPU::JUMP:
587 case AMDGPU::BRANCH:
586 case R600::JUMP:
587 case R600::BRANCH:
588588 return true;
589589 default:
590590 return false;
633633 MachineBasicBlock::reverse_iterator It = MBB->rbegin();
634634 if (It != MBB->rend()) {
635635 MachineInstr *instr = &(*It);
636 if (instr->getOpcode() == AMDGPU::RETURN)
636 if (instr->getOpcode() == R600::RETURN)
637637 return instr;
638638 }
639639 return nullptr;
686686 MachineBasicBlock::iterator E = MBB->end();
687687 MachineBasicBlock::iterator It = Pre;
688688 while (It != E) {
689 if (Pre->getOpcode() == AMDGPU::CONTINUE
690 && It->getOpcode() == AMDGPU::ENDLOOP)
689 if (Pre->getOpcode() == R600::CONTINUE
690 && It->getOpcode() == R600::ENDLOOP)
691691 ContInstr.push_back(&*Pre);
692692 Pre = It;
693693 ++It;
13021302
13031303 bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
13041304
1305 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1306 MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
1305 //insert R600::ENDIF to avoid special case "input landBlk == NULL"
1306 MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
13071307
13081308 if (LandBlkHasOtherPred) {
13091309 report_fatal_error("Extra register needed to handle CFG");
13101310 unsigned CmpResReg =
13111311 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
13121312 report_fatal_error("Extra compare instruction needed to handle CFG");
1313 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1313 insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
13141314 CmpResReg, DebugLoc());
13151315 }
13161316
13181318 // cause an assertion failure in the PostRA scheduling pass.
13191319 unsigned InitReg =
13201320 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1321 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1321 insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
13221322 DebugLoc());
13231323
13241324 if (MigrateTrue) {
13281328 // (initVal != 1).
13291329 report_fatal_error("Extra register needed to handle CFG");
13301330 }
1331 insertInstrBefore(I, AMDGPU::ELSE);
1331 insertInstrBefore(I, R600::ELSE);
13321332
13331333 if (MigrateFalse) {
13341334 migrateInstruction(FalseMBB, LandBlk, I);
13401340
13411341 if (LandBlkHasOtherPred) {
13421342 // add endif
1343 insertInstrBefore(I, AMDGPU::ENDIF);
1343 insertInstrBefore(I, R600::ENDIF);
13441344
13451345 // put initReg = 2 to other predecessors of landBlk
13461346 for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
14131413 }
14141414
14151415 if (FalseMBB) {
1416 insertInstrBefore(I, AMDGPU::ELSE);
1416 insertInstrBefore(I, R600::ELSE);
14171417 MBB->splice(I, FalseMBB, FalseMBB->begin(),
14181418 FalseMBB->end());
14191419 MBB->removeSuccessor(FalseMBB, true);
14221422 retireBlock(FalseMBB);
14231423 MLI->removeBlock(FalseMBB);
14241424 }
1425 insertInstrBefore(I, AMDGPU::ENDIF);
1425 insertInstrBefore(I, R600::ENDIF);
14261426
14271427 BranchMI->eraseFromParent();
14281428
14351435 LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
14361436 << " land = BB" << LandMBB->getNumber() << "\n";);
14371437
1438 insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
1439 insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
1438 insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
1439 insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
14401440 DstBlk->replaceSuccessor(DstBlk, LandMBB);
14411441 }
14421442
14521452 MachineBasicBlock::iterator I = BranchMI;
14531453 if (TrueBranch != LandMBB)
14541454 reversePredicateSetter(I, *I->getParent());
1455 insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1456 insertInstrBefore(I, AMDGPU::BREAK);
1457 insertInstrBefore(I, AMDGPU::ENDIF);
1455 insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
1456 insertInstrBefore(I, R600::BREAK);
1457 insertInstrBefore(I, R600::ENDIF);
14581458 //now branchInst can be erase safely
14591459 BranchMI->eraseFromParent();
14601460 //now take care of successors, retire blocks
14831483 getBranchZeroOpcode(OldOpcode);
14841484 insertCondBranchBefore(I, BranchOpcode, DL);
14851485 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1486 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1487 insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1486 insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
1487 insertInstrEnd(ContingMBB, R600::ENDIF, DL);
14881488 } else {
14891489 int BranchOpcode =
14901490 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
14991499 // location we've just inserted that reference here so it should be
15001500 // representative insertEnd to ensure phi-moves, if exist, go before the
15011501 // continue-instr.
1502 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1502 insertInstrEnd(ContingMBB, R600::CONTINUE,
15031503 getLastDebugLocInBB(ContingMBB));
15041504 }
15051505 }
16261626 SmallVectorImpl &RetMBB) {
16271627 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
16281628 FuncRep->push_back(DummyExitBlk); //insert to function
1629 insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
1629 insertInstrEnd(DummyExitBlk, R600::RETURN);
16301630
16311631 for (SmallVectorImpl::iterator It = RetMBB.begin(),
16321632 E = RetMBB.end(); It != E; ++It) {
33 tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
44 tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
55 tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
6 tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
76 tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
87 tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
98 tablegen(LLVM AMDGPUGenIntrinsicEnums.inc -gen-tgt-intrinsic-enums)
1716
1817 set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td)
1918 tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel)
19
20 set(LLVM_TARGET_DEFINITIONS R600.td)
21 tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer)
22 tablegen(LLVM R600GenCallingConv.inc -gen-callingconv)
23 tablegen(LLVM R600GenDAGISel.inc -gen-dag-isel)
24 tablegen(LLVM R600GenDFAPacketizer.inc -gen-dfa-packetizer)
25 tablegen(LLVM R600GenInstrInfo.inc -gen-instr-info)
26 tablegen(LLVM R600GenMCCodeEmitter.inc -gen-emitter)
27 tablegen(LLVM R600GenRegisterInfo.inc -gen-register-info)
28 tablegen(LLVM R600GenSubtargetInfo.inc -gen-subtarget)
2029
2130 add_public_tablegen_target(AMDGPUCommonTableGen)
2231
1919 #include "Disassembler/AMDGPUDisassembler.h"
2020 #include "AMDGPU.h"
2121 #include "AMDGPURegisterInfo.h"
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
2223 #include "SIDefines.h"
2324 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
2425 #include "Utils/AMDGPUBaseInfo.h"
1313 //===----------------------------------------------------------------------===//
1414
1515 def isEG : Predicate<
16 "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
17 "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && "
16 "Subtarget->getGeneration() >= R600Subtarget::EVERGREEN && "
1817 "!Subtarget->hasCaymanISA()"
1918 >;
2019
2120 def isEGorCayman : Predicate<
22 "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
23 "Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS"
21 "Subtarget->getGeneration() == R600Subtarget::EVERGREEN ||"
22 "Subtarget->getGeneration() == R600Subtarget::NORTHERN_ISLANDS"
2423 >;
2524
2625 class EGPat : AMDGPUPat {
509509 void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
510510 const MCSubtargetInfo &STI,
511511 raw_ostream &O) {
512 if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) {
513 static_cast(this)->printOperand(MI, OpNo, O);
514 return;
515 }
516
517512 if (OpNo >= MI->getNumOperands()) {
518513 O << "/*Missing OP" << OpNo << "*/";
519514 return;
964959 void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
965960 const MCSubtargetInfo &STI,
966961 raw_ostream &O) {
967 if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) {
968 static_cast(this)->printMemOperand(MI, OpNo, O);
969 return;
970 }
971
972962 printOperand(MI, OpNo, STI, O);
973963 O << ", ";
974964 printOperand(MI, OpNo + 1, STI, O);
992982 assert(Op.isImm());
993983 if (Op.getImm() == 1)
994984 O << Asm;
995 }
996
997 void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
998 const MCSubtargetInfo &STI, raw_ostream &O) {
999 static_cast(this)->printAbs(MI, OpNo, O);
1000 }
1001
1002 void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
1003 const MCSubtargetInfo &STI, raw_ostream &O) {
1004 static_cast(this)->printClamp(MI, OpNo, O);
1005985 }
1006986
1007987 void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
10281008 O << " mul:4";
10291009 else if (Imm == SIOutMods::DIV2)
10301010 O << " div:2";
1031 }
1032
1033 void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
1034 const MCSubtargetInfo &STI,
1035 raw_ostream &O) {
1036 static_cast(this)->printLiteral(MI, OpNo, O);
1037 }
1038
1039 void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
1040 const MCSubtargetInfo &STI, raw_ostream &O) {
1041 static_cast(this)->printLast(MI, OpNo, O);
1042 }
1043
1044 void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
1045 const MCSubtargetInfo &STI, raw_ostream &O) {
1046 static_cast(this)->printNeg(MI, OpNo, O);
1047 }
1048
1049 void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
1050 const MCSubtargetInfo &STI, raw_ostream &O) {
1051 static_cast(this)->printOMOD(MI, OpNo, O);
1052 }
1053
1054 void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
1055 const MCSubtargetInfo &STI, raw_ostream &O) {
1056 static_cast(this)->printRel(MI, OpNo, O);
1057 }
1058
1059 void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
1060 const MCSubtargetInfo &STI,
1061 raw_ostream &O) {
1062 static_cast(this)->printUpdateExecMask(MI, OpNo, O);
1063 }
1064
1065 void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
1066 const MCSubtargetInfo &STI,
1067 raw_ostream &O) {
1068 static_cast(this)->printUpdatePred(MI, OpNo, O);
1069 }
1070
1071 void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
1072 const MCSubtargetInfo &STI, raw_ostream &O) {
1073 static_cast(this)->printWrite(MI, OpNo, O);
1074 }
1075
1076 void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
1077 const MCSubtargetInfo &STI,
1078 raw_ostream &O) {
1079 static_cast(this)->printBankSwizzle(MI, OpNo, O);
1080 }
1081
1082 void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
1083 const MCSubtargetInfo &STI, raw_ostream &O) {
1084 static_cast(this)->printRSel(MI, OpNo, O);
1085 }
1086
1087 void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
1088 const MCSubtargetInfo &STI, raw_ostream &O) {
1089 static_cast(this)->printCT(MI, OpNo, O);
1090 }
1091
1092 void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
1093 const MCSubtargetInfo &STI, raw_ostream &O) {
1094 static_cast(this)->printKCache(MI, OpNo, O);
10951011 }
10961012
10971013 void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
12981214
12991215 #include "AMDGPUGenAsmWriter.inc"
13001216
1217 void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
1218 StringRef Annot, const MCSubtargetInfo &STI) {
1219 O.flush();
1220 printInstruction(MI, O);
1221 printAnnotation(O, Annot);
1222 }
1223
13011224 void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
13021225 raw_ostream &O) {
13031226 AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
14161339 if (Op.isReg()) {
14171340 switch (Op.getReg()) {
14181341 // This is the default predicate state, so we don't need to print it.
1419 case AMDGPU::PRED_SEL_OFF:
1342 case R600::PRED_SEL_OFF:
14201343 break;
14211344
14221345 default:
14921415 O << " (MASKED)";
14931416 }
14941417 }
1418
1419 #include "R600GenAsmWriter.inc"
217217 raw_ostream &O);
218218 };
219219
220 // FIXME: R600 specific parts of AMDGPUInstrPrinter should be moved here, and
221 // MCTargetDesc should be using R600InstPrinter for the R600 target.
222 class R600InstPrinter : public AMDGPUInstPrinter {
220 class R600InstPrinter : public MCInstPrinter {
223221 public:
224222 R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
225223 const MCRegisterInfo &MRI)
226 : AMDGPUInstPrinter(MAI, MII, MRI) {}
224 : MCInstPrinter(MAI, MII, MRI) {}
225
226 void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
227 const MCSubtargetInfo &STI) override;
228 void printInstruction(const MCInst *MI, raw_ostream &O);
229 static const char *getRegisterName(unsigned RegNo);
227230
228231 void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
229232 void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
3737 #define GET_SUBTARGETINFO_MC_DESC
3838 #include "AMDGPUGenSubtargetInfo.inc"
3939
40 #define NoSchedModel NoSchedModelR600
41 #define GET_SUBTARGETINFO_MC_DESC
42 #include "R600GenSubtargetInfo.inc"
43 #undef NoSchedModelR600
44
4045 #define GET_REGINFO_MC_DESC
4146 #include "AMDGPUGenRegisterInfo.inc"
47
48 #define GET_REGINFO_MC_DESC
49 #include "R600GenRegisterInfo.inc"
4250
4351 static MCInstrInfo *createAMDGPUMCInstrInfo() {
4452 MCInstrInfo *X = new MCInstrInfo();
4856
4957 static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) {
5058 MCRegisterInfo *X = new MCRegisterInfo();
51 InitAMDGPUMCRegisterInfo(X, 0);
59 if (TT.getArch() == Triple::r600)
60 InitR600MCRegisterInfo(X, 0);
61 else
62 InitAMDGPUMCRegisterInfo(X, 0);
5263 return X;
5364 }
5465
5566 static MCSubtargetInfo *
5667 createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
68 if (TT.getArch() == Triple::r600)
69 return createR600MCSubtargetInfoImpl(TT, CPU, FS);
5770 return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS);
5871 }
5972
6275 const MCAsmInfo &MAI,
6376 const MCInstrInfo &MII,
6477 const MCRegisterInfo &MRI) {
65 return T.getArch() == Triple::r600 ? new R600InstPrinter(MAI, MII, MRI) :
66 new AMDGPUInstPrinter(MAI, MII, MRI);
78 if (T.getArch() == Triple::r600)
79 return new R600InstPrinter(MAI, MII, MRI);
80 else
81 return new AMDGPUInstPrinter(MAI, MII, MRI);
6782 }
6883
6984 static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S,
89104 }
90105
91106 extern "C" void LLVMInitializeAMDGPUTargetMC() {
107
108 TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo);
109 TargetRegistry::RegisterMCInstrInfo(getTheAMDGPUTarget(), createR600MCInstrInfo);
92110 for (Target *T : {&getTheAMDGPUTarget(), &getTheGCNTarget()}) {
93111 RegisterMCAsmInfo X(*T);
94112
95 TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
96113 TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
97114 TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
98115 TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
3939 MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
4040 const MCRegisterInfo &MRI,
4141 MCContext &Ctx);
42 MCInstrInfo *createR600MCInstrInfo();
4243
4344 MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
4445 const MCRegisterInfo &MRI,
5859 #include "AMDGPUGenRegisterInfo.inc"
5960 #undef GET_REGINFO_ENUM
6061
62 #define GET_REGINFO_ENUM
63 #include "R600GenRegisterInfo.inc"
64 #undef GET_REGINFO_ENUM
65
6166 #define GET_INSTRINFO_ENUM
6267 #define GET_INSTRINFO_OPERAND_ENUM
6368 #define GET_INSTRINFO_SCHED_ENUM
6671 #undef GET_INSTRINFO_OPERAND_ENUM
6772 #undef GET_INSTRINFO_ENUM
6873
74 #define GET_INSTRINFO_ENUM
75 #define GET_INSTRINFO_OPERAND_ENUM
76 #define GET_INSTRINFO_SCHED_ENUM
77 #include "R600GenInstrInfo.inc"
78 #undef GET_INSTRINFO_SCHED_ENUM
79 #undef GET_INSTRINFO_OPERAND_ENUM
80 #undef GET_INSTRINFO_ENUM
6981
7082 #define GET_SUBTARGETINFO_ENUM
7183 #include "AMDGPUGenSubtargetInfo.inc"
7284 #undef GET_SUBTARGETINFO_ENUM
7385
86 #define GET_SUBTARGETINFO_ENUM
87 #include "R600GenSubtargetInfo.inc"
88 #undef GET_SUBTARGETINFO_ENUM
89
7490 #endif
77 AMDGPUMCTargetDesc.cpp
88 AMDGPUTargetStreamer.cpp
99 R600MCCodeEmitter.cpp
10 R600MCTargetDesc.cpp
1011 SIMCCodeEmitter.cpp
1112 )
1414 //===----------------------------------------------------------------------===//
1515
1616 #include "MCTargetDesc/AMDGPUFixupKinds.h"
17 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
1817 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1918 #include "R600Defines.h"
2019 #include "llvm/MC/MCCodeEmitter.h"
3534
3635 namespace {
3736
38 class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
37 class R600MCCodeEmitter : public MCCodeEmitter {
3938 const MCRegisterInfo &MRI;
39 const MCInstrInfo &MCII;
4040
4141 public:
4242 R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
43 : AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
43 : MRI(mri), MCII(mcii) {}
4444 R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
4545 R600MCCodeEmitter &operator=(const R600MCCodeEmitter &) = delete;
4646
4747 /// Encode the instruction and write it to the OS.
4848 void encodeInstruction(const MCInst &MI, raw_ostream &OS,
4949 SmallVectorImpl &Fixups,
50 const MCSubtargetInfo &STI) const override;
50 const MCSubtargetInfo &STI) const;
5151
5252 /// \returns the encoding for an MCOperand.
5353 uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
5454 SmallVectorImpl &Fixups,
55 const MCSubtargetInfo &STI) const override;
55 const MCSubtargetInfo &STI) const;
5656
5757 private:
58
5859 void Emit(uint32_t value, raw_ostream &OS) const;
5960 void Emit(uint64_t value, raw_ostream &OS) const;
6061
6162 unsigned getHWReg(unsigned regNo) const;
63
64 uint64_t getBinaryCodeForInstr(const MCInst &MI,
65 SmallVectorImpl &Fixups,
66 const MCSubtargetInfo &STI) const;
67 uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
68 void verifyInstructionPredicates(const MCInst &MI,
69 uint64_t AvailableFeatures) const;
70
6271 };
6372
6473 } // end anonymous namespace
93102 computeAvailableFeatures(STI.getFeatureBits()));
94103
95104 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
96 if (MI.getOpcode() == AMDGPU::RETURN ||
97 MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
98 MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
99 MI.getOpcode() == AMDGPU::BUNDLE ||
100 MI.getOpcode() == AMDGPU::KILL) {
105 if (MI.getOpcode() == R600::RETURN ||
106 MI.getOpcode() == R600::FETCH_CLAUSE ||
107 MI.getOpcode() == R600::ALU_CLAUSE ||
108 MI.getOpcode() == R600::BUNDLE ||
109 MI.getOpcode() == R600::KILL) {
101110 return;
102111 } else if (IS_VTX(Desc)) {
103112 uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
104113 uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
105 if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) {
114 if (!(STI.getFeatureBits()[R600::FeatureCaymanISA])) {
106115 InstWord2 |= 1 << 19; // Mega-Fetch bit
107116 }
108117
135144 Emit((uint32_t) 0, OS);
136145 } else {
137146 uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
138 if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) &&
147 if ((STI.getFeatureBits()[R600::FeatureR600ALUInst]) &&
139148 ((Desc.TSFlags & R600_InstFlag::OP1) ||
140149 Desc.TSFlags & R600_InstFlag::OP2)) {
141150 uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
185194 }
186195
187196 #define ENABLE_INSTR_PREDICATE_VERIFIER
188 #include "AMDGPUGenMCCodeEmitter.inc"
197 #include "R600GenMCCodeEmitter.inc"
0 //===-- R600MCTargetDesc.cpp - R600 Target Descriptions -------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This file provides R600 specific target descriptions.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUMCTargetDesc.h"
15 #include "llvm/MC/MCInstrInfo.h"
16
17 using namespace llvm;
18
19 #define GET_INSTRINFO_MC_DESC
20 #include "R600GenInstrInfo.inc"
21
22 MCInstrInfo *llvm::createR600MCInstrInfo() {
23 MCInstrInfo *X = new MCInstrInfo();
24 InitR600MCInstrInfo(X);
25 return X;
26 }
437437 llvm_unreachable("Encoding of this operand type is not supported yet.");
438438 return 0;
439439 }
440
441 #define ENABLE_INSTR_PREDICATE_VERIFIER
442 #include "AMDGPUGenMCCodeEmitter.inc"
0 //===-- R600.td - R600 Tablegen files ----------------------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 include "llvm/Target/Target.td"
10
11 def R600InstrInfo : InstrInfo {
12 let guessInstructionProperties = 1;
13 let noNamedPositionallyEncodedOperands = 1;
14 }
15
16 def R600 : Target {
17 let InstructionSet = R600InstrInfo;
18 let AllowRegisterRenaming = 1;
19 }
20
21 let Namespace = "R600" in {
22
23 foreach Index = 0-15 in {
24 def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
25 }
26
27 include "R600RegisterInfo.td"
28
29 }
30
31 def NullALU : InstrItinClass;
32 def ALU_NULL : FuncUnit;
33
34 include "AMDGPUFeatures.td"
35 include "R600Schedule.td"
36 include "R600Processors.td"
37 include "AMDGPUInstrInfo.td"
38 include "AMDGPUInstructions.td"
39 include "R600Instructions.td"
40 include "R700Instructions.td"
41 include "EvergreenInstructions.td"
42 include "CaymanInstructions.td"
43
44 // Calling convention for R600
45 def CC_R600 : CallingConv<[
46 CCIfInReg
47 T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
48 T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
49 T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
50 T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
51 T30_XYZW, T31_XYZW, T32_XYZW
52 ]>>>
53 ]>;
54
55 // Calling convention for compute kernels
56 def CC_R600_Kernel : CallingConv<[
57 CCCustom<"allocateKernArg">
58 ]>;
5050
5151 for (const MachineBasicBlock &MBB : MF) {
5252 for (const MachineInstr &MI : MBB) {
53 if (MI.getOpcode() == AMDGPU::KILLGT)
53 if (MI.getOpcode() == R600::KILLGT)
5454 killPixel = true;
5555 unsigned numOperands = MI.getNumOperands();
5656 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
3333
3434 static bool isCFAlu(const MachineInstr &MI) {
3535 switch (MI.getOpcode()) {
36 case AMDGPU::CF_ALU:
37 case AMDGPU::CF_ALU_PUSH_BEFORE:
36 case R600::CF_ALU:
37 case R600::CF_ALU_PUSH_BEFORE:
3838 return true;
3939 default:
4040 return false;
8484 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
8585 assert(isCFAlu(MI));
8686 return MI
87 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
87 .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT))
8888 .getImm();
8989 }
9090
9191 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
9292 assert(isCFAlu(MI));
9393 return MI
94 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
94 .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled))
9595 .getImm();
9696 }
9797
9898 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
9999 MachineInstr &CFAlu) const {
100 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
100 int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
101101 MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
102102 I++;
103103 do {
116116 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
117117 const MachineInstr &LatrCFAlu) const {
118118 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
119 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
119 int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
120120 unsigned RootInstCount = getCFAluSize(RootCFAlu),
121121 LaterInstCount = getCFAluSize(LatrCFAlu);
122122 unsigned CumuledInsts = RootInstCount + LaterInstCount;
124124 LLVM_DEBUG(dbgs() << "Excess inst counts\n");
125125 return false;
126126 }
127 if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
127 if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE)
128128 return false;
129129 // Is KCache Bank 0 compatible ?
130130 int Mode0Idx =
131 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
131 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0);
132132 int KBank0Idx =
133 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
133 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0);
134134 int KBank0LineIdx =
135 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
135 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0);
136136 if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
137137 RootCFAlu.getOperand(Mode0Idx).getImm() &&
138138 (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
144144 }
145145 // Is KCache Bank 1 compatible ?
146146 int Mode1Idx =
147 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
147 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1);
148148 int KBank1Idx =
149 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
149 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1);
150150 int KBank1LineIdx =
151 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
151 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1);
152152 if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
153153 RootCFAlu.getOperand(Mode1Idx).getImm() &&
154154 (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
9393 }
9494
9595 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
96 if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
96 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
9797 getLoopDepth() > 1)
9898 return true;
9999
102102
103103 switch(Opcode) {
104104 default: return false;
105 case AMDGPU::CF_ALU_PUSH_BEFORE:
106 case AMDGPU::CF_ALU_ELSE_AFTER:
107 case AMDGPU::CF_ALU_BREAK:
108 case AMDGPU::CF_ALU_CONTINUE:
105 case R600::CF_ALU_PUSH_BEFORE:
106 case R600::CF_ALU_ELSE_AFTER:
107 case R600::CF_ALU_BREAK:
108 case R600::CF_ALU_CONTINUE:
109109 if (CurrentSubEntries == 0)
110110 return false;
111111 if (ST->getWavefrontSize() == 64) {
167167 void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
168168 CFStack::StackItem Item = CFStack::ENTRY;
169169 switch(Opcode) {
170 case AMDGPU::CF_PUSH_EG:
171 case AMDGPU::CF_ALU_PUSH_BEFORE:
170 case R600::CF_PUSH_EG:
171 case R600::CF_ALU_PUSH_BEFORE:
172172 if (!isWQM) {
173173 if (!ST->hasCaymanISA() &&
174174 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
239239
240240 bool IsTrivialInst(MachineInstr &MI) const {
241241 switch (MI.getOpcode()) {
242 case AMDGPU::KILL:
243 case AMDGPU::RETURN:
242 case R600::KILL:
243 case R600::RETURN:
244244 return true;
245245 default:
246246 return false;
252252 bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
253253 switch (CFI) {
254254 case CF_TC:
255 Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
255 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
256256 break;
257257 case CF_VC:
258 Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
258 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
259259 break;
260260 case CF_CALL_FS:
261 Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
261 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
262262 break;
263263 case CF_WHILE_LOOP:
264 Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
264 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
265265 break;
266266 case CF_END_LOOP:
267 Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
267 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
268268 break;
269269 case CF_LOOP_BREAK:
270 Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
270 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
271271 break;
272272 case CF_LOOP_CONTINUE:
273 Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
273 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
274274 break;
275275 case CF_JUMP:
276 Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
276 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
277277 break;
278278 case CF_ELSE:
279 Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
279 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
280280 break;
281281 case CF_POP:
282 Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
282 Opcode = isEg ? R600::POP_EG : R600::POP_R600;
283283 break;
284284 case CF_END:
285285 if (ST->hasCaymanISA()) {
286 Opcode = AMDGPU::CF_END_CM;
286 Opcode = R600::CF_END_CM;
287287 break;
288288 }
289 Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
289 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
290290 break;
291291 }
292292 assert (Opcode && "No opcode selected");
304304 continue;
305305 if (MO.isDef()) {
306306 unsigned Reg = MO.getReg();
307 if (AMDGPU::R600_Reg128RegClass.contains(Reg))
307 if (R600::R600_Reg128RegClass.contains(Reg))
308308 DstMI = Reg;
309309 else
310310 DstMI = TRI->getMatchingSuperReg(Reg,
311311 AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
312 &AMDGPU::R600_Reg128RegClass);
312 &R600::R600_Reg128RegClass);
313313 }
314314 if (MO.isUse()) {
315315 unsigned Reg = MO.getReg();
316 if (AMDGPU::R600_Reg128RegClass.contains(Reg))
316 if (R600::R600_Reg128RegClass.contains(Reg))
317317 SrcMI = Reg;
318318 else
319319 SrcMI = TRI->getMatchingSuperReg(Reg,
320320 AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
321 &AMDGPU::R600_Reg128RegClass);
321 &R600::R600_Reg128RegClass);
322322 }
323323 }
324324 if ((DstRegs.find(SrcMI) == DstRegs.end())) {
358358
359359 void getLiteral(MachineInstr &MI, std::vector &Lits) const {
360360 static const unsigned LiteralRegs[] = {
361 AMDGPU::ALU_LITERAL_X,
362 AMDGPU::ALU_LITERAL_Y,
363 AMDGPU::ALU_LITERAL_Z,
364 AMDGPU::ALU_LITERAL_W
361 R600::ALU_LITERAL_X,
362 R600::ALU_LITERAL_Y,
363 R600::ALU_LITERAL_Z,
364 R600::ALU_LITERAL_W
365365 };
366366 const SmallVector, 3> Srcs =
367367 TII->getSrcs(MI);
368368 for (const auto &Src:Srcs) {
369 if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X)
369 if (Src.first->getReg() != R600::ALU_LITERAL_X)
370370 continue;
371371 int64_t Imm = Src.second;
372372 std::vector::iterator It =
376376
377377 // Get corresponding Operand
378378 MachineOperand &Operand = MI.getOperand(
379 TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
379 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
380380
381381 if (It != Lits.end()) {
382382 // Reuse existing literal reg
399399 unsigned LiteralPair0 = Literals[i];
400400 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
401401 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
402 TII->get(AMDGPU::LITERALS))
402 TII->get(R600::LITERALS))
403403 .addImm(LiteralPair0)
404404 .addImm(LiteralPair1);
405405 }
441441 }
442442 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
443443 MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
444 TII->get(AMDGPU::LITERALS));
444 TII->get(R600::LITERALS));
445445 if (Literals[i]->isImm()) {
446446 MILit.addImm(Literals[i]->getImm());
447447 } else {
470470 unsigned &CfCount) {
471471 CounterPropagateAddr(*Clause.first, CfCount);
472472 MachineBasicBlock *BB = Clause.first->getParent();
473 BuildMI(BB, DL, TII->get(AMDGPU::FETCH_CLAUSE)).addImm(CfCount);
473 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
474474 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
475475 BB->splice(InsertPos, BB, Clause.second[i]);
476476 }
482482 Clause.first->getOperand(0).setImm(0);
483483 CounterPropagateAddr(*Clause.first, CfCount);
484484 MachineBasicBlock *BB = Clause.first->getParent();
485 BuildMI(BB, DL, TII->get(AMDGPU::ALU_CLAUSE)).addImm(CfCount);
485 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
486486 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
487487 BB->splice(InsertPos, BB, Clause.second[i]);
488488 }
539539 }
540540
541541 MachineBasicBlock::iterator MI = I;
542 if (MI->getOpcode() != AMDGPU::ENDIF)
542 if (MI->getOpcode() != R600::ENDIF)
543543 LastAlu.back() = nullptr;
544 if (MI->getOpcode() == AMDGPU::CF_ALU)
544 if (MI->getOpcode() == R600::CF_ALU)
545545 LastAlu.back() = &*MI;
546546 I++;
547547 bool RequiresWorkAround =
548548 CFStack.requiresWorkAroundForInst(MI->getOpcode());
549549 switch (MI->getOpcode()) {
550 case AMDGPU::CF_ALU_PUSH_BEFORE:
550 case R600::CF_ALU_PUSH_BEFORE:
551551 if (RequiresWorkAround) {
552552 LLVM_DEBUG(dbgs()
553553 << "Applying bug work-around for ALU_PUSH_BEFORE\n");
554 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
554 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
555555 .addImm(CfCount + 1)
556556 .addImm(1);
557 MI->setDesc(TII->get(AMDGPU::CF_ALU));
557 MI->setDesc(TII->get(R600::CF_ALU));
558558 CfCount++;
559 CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
559 CFStack.pushBranch(R600::CF_PUSH_EG);
560560 } else
561 CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
561 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
562562 LLVM_FALLTHROUGH;
563 case AMDGPU::CF_ALU:
563 case R600::CF_ALU:
564564 I = MI;
565565 AluClauses.push_back(MakeALUClause(MBB, I));
566566 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
567567 CfCount++;
568568 break;
569 case AMDGPU::WHILELOOP: {
569 case R600::WHILELOOP: {
570570 CFStack.pushLoop();
571571 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
572572 getHWInstrDesc(CF_WHILE_LOOP))
579579 CfCount++;
580580 break;
581581 }
582 case AMDGPU::ENDLOOP: {
582 case R600::ENDLOOP: {
583583 CFStack.popLoop();
584584 std::pair> Pair =
585585 std::move(LoopStack.back());
591591 CfCount++;
592592 break;
593593 }
594 case AMDGPU::IF_PREDICATE_SET: {
594 case R600::IF_PREDICATE_SET: {