llvm.org GIT mirror llvm / f98f2ce
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169915 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 7 years ago
146 changed file(s) with 20232 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
471471 include "llvm/IntrinsicsHexagon.td"
472472 include "llvm/IntrinsicsNVVM.td"
473473 include "llvm/IntrinsicsMips.td"
474 include "llvm/IntrinsicsR600.td"
0 //===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines all of the R600-specific intrinsics.
10 //
11 //===----------------------------------------------------------------------===//
12
13 let TargetPrefix = "r600" in {
14
15 class R600ReadPreloadRegisterIntrinsic
16 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
17 GCCBuiltin;
18
19 multiclass R600ReadPreloadRegisterIntrinsic_xyz {
20 def _x : R600ReadPreloadRegisterIntrinsic;
21 def _y : R600ReadPreloadRegisterIntrinsic;
22 def _z : R600ReadPreloadRegisterIntrinsic;
23 }
24
25 defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
26 "__builtin_r600_read_global_size">;
27 defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
28 "__builtin_r600_read_local_size">;
29 defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
30 "__builtin_r600_read_ngroups">;
31 defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
32 "__builtin_r600_read_tgid">;
33 defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
34 "__builtin_r600_read_tidig">;
35 } // End TargetPrefix = "r600"
1515 ;===------------------------------------------------------------------------===;
1616
1717 [common]
18 subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
18 subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
1919
2020 ; This is a special group whose required libraries are extended (by llvm-build)
2121 ; with the best execution engine (the native JIT, if available, or the
0 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9
10 #ifndef AMDGPU_H
11 #define AMDGPU_H
12
13 #include "AMDGPUTargetMachine.h"
14 #include "llvm/Support/TargetRegistry.h"
15 #include "llvm/Target/TargetMachine.h"
16
17 namespace llvm {
18
19 class FunctionPass;
20 class AMDGPUTargetMachine;
21
22 // R600 Passes
23 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
24 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
25
26 // SI Passes
27 FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
28 FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
29 FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
30 FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
31 FunctionPass *createSIFixSGPRLivenessPass(TargetMachine &tm);
32
33 // Passes common to R600 and SI
34 FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
35
36 } // End namespace llvm
37
38 namespace ShaderType {
39 enum Type {
40 PIXEL = 0,
41 VERTEX = 1,
42 GEOMETRY = 2,
43 COMPUTE = 3
44 };
45 }
46
47 #endif // AMDGPU_H
0 //===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8
9 // Include AMDIL TD files
10 include "AMDILBase.td"
11
12
13 def AMDGPUInstrInfo : InstrInfo {
14 let guessInstructionProperties = 1;
15 }
16
17 //===----------------------------------------------------------------------===//
18 // Declare the target which we are implementing
19 //===----------------------------------------------------------------------===//
20 def AMDGPUAsmWriter : AsmWriter {
21 string AsmWriterClassName = "InstPrinter";
22 int Variant = 0;
23 bit isMCAsmWriter = 1;
24 }
25
26 def AMDGPU : Target {
27 // Pull in Instruction Info:
28 let InstructionSet = AMDGPUInstrInfo;
29 let AssemblyWriters = [AMDGPUAsmWriter];
30 }
31
32 // Include AMDGPU TD files
33 include "R600Schedule.td"
34 include "SISchedule.td"
35 include "Processors.td"
36 include "AMDGPUInstrInfo.td"
37 include "AMDGPUIntrinsics.td"
38 include "AMDGPURegisterInfo.td"
39 include "AMDGPUInstructions.td"
0 //===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
12 /// code. When passed an MCAsmStreamer it prints assembly and when passed
13 /// an MCObjectStreamer it outputs binary code.
14 //
15 //===----------------------------------------------------------------------===//
16 //
17
18
19 #include "AMDGPUAsmPrinter.h"
20 #include "AMDGPU.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
23 #include "llvm/MC/MCStreamer.h"
24 #include "llvm/Target/TargetLoweringObjectFile.h"
25 #include "llvm/Support/TargetRegistry.h"
26
27 using namespace llvm;
28
29
30 static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
31 MCStreamer &Streamer) {
32 return new AMDGPUAsmPrinter(tm, Streamer);
33 }
34
35 extern "C" void LLVMInitializeR600AsmPrinter() {
36 TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
37 }
38
39 /// We need to override this function so we can avoid
40 /// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
41 bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
42 const AMDGPUSubtarget &STM = TM.getSubtarget();
43 if (STM.dumpCode()) {
44 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
45 MF.dump();
46 #endif
47 }
48 SetupMachineFunction(MF);
49 OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
50 if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
51 EmitProgramInfo(MF);
52 }
53 EmitFunctionBody();
54 return false;
55 }
56
57 void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
58 unsigned MaxSGPR = 0;
59 unsigned MaxVGPR = 0;
60 bool VCCUsed = false;
61 const SIRegisterInfo * RI =
62 static_cast(TM.getRegisterInfo());
63
64 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
65 BB != BB_E; ++BB) {
66 MachineBasicBlock &MBB = *BB;
67 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
68 I != E; ++I) {
69 MachineInstr &MI = *I;
70
71 unsigned numOperands = MI.getNumOperands();
72 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
73 MachineOperand & MO = MI.getOperand(op_idx);
74 unsigned maxUsed;
75 unsigned width = 0;
76 bool isSGPR = false;
77 unsigned reg;
78 unsigned hwReg;
79 if (!MO.isReg()) {
80 continue;
81 }
82 reg = MO.getReg();
83 if (reg == AMDGPU::VCC) {
84 VCCUsed = true;
85 continue;
86 }
87 switch (reg) {
88 default: break;
89 case AMDGPU::EXEC:
90 case AMDGPU::SI_LITERAL_CONSTANT:
91 case AMDGPU::SREG_LIT_0:
92 case AMDGPU::M0:
93 continue;
94 }
95
96 if (AMDGPU::SReg_32RegClass.contains(reg)) {
97 isSGPR = true;
98 width = 1;
99 } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
100 isSGPR = false;
101 width = 1;
102 } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
103 isSGPR = true;
104 width = 2;
105 } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
106 isSGPR = false;
107 width = 2;
108 } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
109 isSGPR = true;
110 width = 4;
111 } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
112 isSGPR = false;
113 width = 4;
114 } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
115 isSGPR = true;
116 width = 8;
117 } else {
118 assert(!"Unknown register class");
119 }
120 hwReg = RI->getEncodingValue(reg);
121 maxUsed = hwReg + width - 1;
122 if (isSGPR) {
123 MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
124 } else {
125 MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
126 }
127 }
128 }
129 }
130 if (VCCUsed) {
131 MaxSGPR += 2;
132 }
133 SIMachineFunctionInfo * MFI = MF.getInfo();
134 OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
135 OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
136 OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
137 }
0 //===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief AMDGPU Assembly printer class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef AMDGPU_ASMPRINTER_H
15 #define AMDGPU_ASMPRINTER_H
16
17 #include "llvm/CodeGen/AsmPrinter.h"
18
19 namespace llvm {
20
21 class AMDGPUAsmPrinter : public AsmPrinter {
22
23 public:
24 explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
25 : AsmPrinter(TM, Streamer) { }
26
27 virtual bool runOnMachineFunction(MachineFunction &MF);
28
29 virtual const char *getPassName() const {
30 return "AMDGPU Assembly Printer";
31 }
32
33 /// \brief Emit register usage information so that the GPU driver
34 /// can correctly setup the GPU state.
35 void EmitProgramInfo(MachineFunction &MF);
36
37 /// Implemented in AMDGPUMCInstLower.cpp
38 virtual void EmitInstruction(const MachineInstr *MI);
39 };
40
41 } // End anonymous llvm
42
43 #endif //AMDGPU_ASMPRINTER_H
0 //===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief CodeEmitter interface for R600 and SI codegen.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef AMDGPUCODEEMITTER_H
15 #define AMDGPUCODEEMITTER_H
16
17 namespace llvm {
18
19 class AMDGPUCodeEmitter {
20 public:
21 uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
22 virtual uint64_t getMachineOpValue(const MachineInstr &MI,
23 const MachineOperand &MO) const { return 0; }
24 virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
25 unsigned OpNo) const {
26 return 0;
27 }
28 virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
29 unsigned OpNo) const {
30 return 0;
31 }
32 virtual uint64_t VOPPostEncode(const MachineInstr &MI,
33 uint64_t Value) const {
34 return Value;
35 }
36 virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
37 unsigned OpNo) const {
38 return 0;
39 }
40 virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
41 const {
42 return 0;
43 }
44 };
45
46 } // End namespace llvm
47
48 #endif // AMDGPUCODEEMITTER_H
0 //===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass lowers AMDIL machine instructions to the appropriate
11 /// hardware instructions.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPU.h"
16 #include "AMDGPUInstrInfo.h"
17 #include "llvm/CodeGen/MachineFunctionPass.h"
18
19 using namespace llvm;
20
21 namespace {
22
23 class AMDGPUConvertToISAPass : public MachineFunctionPass {
24
25 private:
26 static char ID;
27 TargetMachine &TM;
28
29 public:
30 AMDGPUConvertToISAPass(TargetMachine &tm) :
31 MachineFunctionPass(ID), TM(tm) { }
32
33 virtual bool runOnMachineFunction(MachineFunction &MF);
34
35 virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
36
37 };
38
39 } // End anonymous namespace
40
41 char AMDGPUConvertToISAPass::ID = 0;
42
43 FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
44 return new AMDGPUConvertToISAPass(tm);
45 }
46
47 bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
48 const AMDGPUInstrInfo * TII =
49 static_cast(TM.getInstrInfo());
50
51 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
52 BB != BB_E; ++BB) {
53 MachineBasicBlock &MBB = *BB;
54 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
55 I != E; ++I) {
56 MachineInstr &MI = *I;
57 TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
58 }
59 }
60 return false;
61 }
0 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This is the parent TargetLowering class for hardware code gen
11 /// targets.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPUISelLowering.h"
16 #include "AMDILIntrinsicInfo.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineRegisterInfo.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
21
22 using namespace llvm;
23
24 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
25 TargetLowering(TM, new TargetLoweringObjectFileELF()) {
26
27 // Initialize target lowering borrowed from AMDIL
28 InitAMDILLowering();
29
30 // We need to custom lower some of the intrinsics
31 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
32
33 // Library functions. These default to Expand, but we have instructions
34 // for them.
35 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
36 setOperationAction(ISD::FEXP2, MVT::f32, Legal);
37 setOperationAction(ISD::FPOW, MVT::f32, Legal);
38 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
39 setOperationAction(ISD::FABS, MVT::f32, Legal);
40 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
41 setOperationAction(ISD::FRINT, MVT::f32, Legal);
42
43 // Lower floating point store/load to integer store/load to reduce the number
44 // of patterns in tablegen.
45 setOperationAction(ISD::STORE, MVT::f32, Promote);
46 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
47
48 setOperationAction(ISD::STORE, MVT::v4f32, Promote);
49 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
50
51 setOperationAction(ISD::LOAD, MVT::f32, Promote);
52 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
53
54 setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
55 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
56
57 setOperationAction(ISD::UDIV, MVT::i32, Expand);
58 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
59 setOperationAction(ISD::UREM, MVT::i32, Expand);
60 }
61
62 //===---------------------------------------------------------------------===//
63 // TargetLowering Callbacks
64 //===---------------------------------------------------------------------===//
65
66 SDValue AMDGPUTargetLowering::LowerFormalArguments(
67 SDValue Chain,
68 CallingConv::ID CallConv,
69 bool isVarArg,
70 const SmallVectorImpl &Ins,
71 DebugLoc DL, SelectionDAG &DAG,
72 SmallVectorImpl &InVals) const {
73 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
74 InVals.push_back(SDValue());
75 }
76 return Chain;
77 }
78
79 SDValue AMDGPUTargetLowering::LowerReturn(
80 SDValue Chain,
81 CallingConv::ID CallConv,
82 bool isVarArg,
83 const SmallVectorImpl &Outs,
84 const SmallVectorImpl &OutVals,
85 DebugLoc DL, SelectionDAG &DAG) const {
86 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
87 }
88
89 //===---------------------------------------------------------------------===//
90 // Target specific lowering
91 //===---------------------------------------------------------------------===//
92
93 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
94 const {
95 switch (Op.getOpcode()) {
96 default:
97 Op.getNode()->dump();
98 assert(0 && "Custom lowering code for this"
99 "instruction is not implemented yet!");
100 break;
101 // AMDIL DAG lowering
102 case ISD::SDIV: return LowerSDIV(Op, DAG);
103 case ISD::SREM: return LowerSREM(Op, DAG);
104 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
105 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
106 // AMDGPU DAG lowering
107 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
108 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
109 }
110 return Op;
111 }
112
113 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
114 SelectionDAG &DAG) const {
115 unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue();
116 DebugLoc DL = Op.getDebugLoc();
117 EVT VT = Op.getValueType();
118
119 switch (IntrinsicID) {
120 default: return Op;
121 case AMDGPUIntrinsic::AMDIL_abs:
122 return LowerIntrinsicIABS(Op, DAG);
123 case AMDGPUIntrinsic::AMDIL_exp:
124 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
125 case AMDGPUIntrinsic::AMDGPU_lrp:
126 return LowerIntrinsicLRP(Op, DAG);
127 case AMDGPUIntrinsic::AMDIL_fraction:
128 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
129 case AMDGPUIntrinsic::AMDIL_mad:
130 return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
131 Op.getOperand(2), Op.getOperand(3));
132 case AMDGPUIntrinsic::AMDIL_max:
133 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
134 Op.getOperand(2));
135 case AMDGPUIntrinsic::AMDGPU_imax:
136 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
137 Op.getOperand(2));
138 case AMDGPUIntrinsic::AMDGPU_umax:
139 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
140 Op.getOperand(2));
141 case AMDGPUIntrinsic::AMDIL_min:
142 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
143 Op.getOperand(2));
144 case AMDGPUIntrinsic::AMDGPU_imin:
145 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
146 Op.getOperand(2));
147 case AMDGPUIntrinsic::AMDGPU_umin:
148 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
149 Op.getOperand(2));
150 case AMDGPUIntrinsic::AMDIL_round_nearest:
151 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
152 }
153 }
154
155 ///IABS(a) = SMAX(sub(0, a), a)
156 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
157 SelectionDAG &DAG) const {
158
159 DebugLoc DL = Op.getDebugLoc();
160 EVT VT = Op.getValueType();
161 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
162 Op.getOperand(1));
163
164 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
165 }
166
167 /// Linear Interpolation
168 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
169 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
170 SelectionDAG &DAG) const {
171 DebugLoc DL = Op.getDebugLoc();
172 EVT VT = Op.getValueType();
173 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
174 DAG.getConstantFP(1.0f, MVT::f32),
175 Op.getOperand(1));
176 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
177 Op.getOperand(3));
178 return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
179 Op.getOperand(2),
180 OneSubAC);
181 }
182
183 /// \brief Generate Min/Max node
184 SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
185 SelectionDAG &DAG) const {
186 DebugLoc DL = Op.getDebugLoc();
187 EVT VT = Op.getValueType();
188
189 SDValue LHS = Op.getOperand(0);
190 SDValue RHS = Op.getOperand(1);
191 SDValue True = Op.getOperand(2);
192 SDValue False = Op.getOperand(3);
193 SDValue CC = Op.getOperand(4);
194
195 if (VT != MVT::f32 ||
196 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
197 return SDValue();
198 }
199
200 ISD::CondCode CCOpcode = cast(CC)->get();
201 switch (CCOpcode) {
202 case ISD::SETOEQ:
203 case ISD::SETONE:
204 case ISD::SETUNE:
205 case ISD::SETNE:
206 case ISD::SETUEQ:
207 case ISD::SETEQ:
208 case ISD::SETFALSE:
209 case ISD::SETFALSE2:
210 case ISD::SETTRUE:
211 case ISD::SETTRUE2:
212 case ISD::SETUO:
213 case ISD::SETO:
214 assert(0 && "Operation should already be optimised !");
215 case ISD::SETULE:
216 case ISD::SETULT:
217 case ISD::SETOLE:
218 case ISD::SETOLT:
219 case ISD::SETLE:
220 case ISD::SETLT: {
221 if (LHS == True)
222 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
223 else
224 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
225 }
226 case ISD::SETGT:
227 case ISD::SETGE:
228 case ISD::SETUGE:
229 case ISD::SETOGE:
230 case ISD::SETUGT:
231 case ISD::SETOGT: {
232 if (LHS == True)
233 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
234 else
235 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
236 }
237 case ISD::SETCC_INVALID:
238 assert(0 && "Invalid setcc condcode !");
239 }
240 return Op;
241 }
242
243
244
245 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
246 SelectionDAG &DAG) const {
247 DebugLoc DL = Op.getDebugLoc();
248 EVT VT = Op.getValueType();
249
250 SDValue Num = Op.getOperand(0);
251 SDValue Den = Op.getOperand(1);
252
253 SmallVector Results;
254
255 // RCP = URECIP(Den) = 2^32 / Den + e
256 // e is rounding error.
257 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
258
259 // RCP_LO = umulo(RCP, Den) */
260 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
261
262 // RCP_HI = mulhu (RCP, Den) */
263 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
264
265 // NEG_RCP_LO = -RCP_LO
266 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
267 RCP_LO);
268
269 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
270 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
271 NEG_RCP_LO, RCP_LO,
272 ISD::SETEQ);
273 // Calculate the rounding error from the URECIP instruction
274 // E = mulhu(ABS_RCP_LO, RCP)
275 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
276
277 // RCP_A_E = RCP + E
278 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
279
280 // RCP_S_E = RCP - E
281 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
282
283 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
284 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
285 RCP_A_E, RCP_S_E,
286 ISD::SETEQ);
287 // Quotient = mulhu(Tmp0, Num)
288 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
289
290 // Num_S_Remainder = Quotient * Den
291 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
292
293 // Remainder = Num - Num_S_Remainder
294 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
295
296 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
297 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
298 DAG.getConstant(-1, VT),
299 DAG.getConstant(0, VT),
300 ISD::SETGE);
301 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
302 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
303 DAG.getConstant(0, VT),
304 DAG.getConstant(-1, VT),
305 DAG.getConstant(0, VT),
306 ISD::SETGE);
307 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
308 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
309 Remainder_GE_Zero);
310
311 // Calculate Division result:
312
313 // Quotient_A_One = Quotient + 1
314 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
315 DAG.getConstant(1, VT));
316
317 // Quotient_S_One = Quotient - 1
318 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
319 DAG.getConstant(1, VT));
320
321 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
322 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
323 Quotient, Quotient_A_One, ISD::SETEQ);
324
325 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
326 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
327 Quotient_S_One, Div, ISD::SETEQ);
328
329 // Calculate Rem result:
330
331 // Remainder_S_Den = Remainder - Den
332 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
333
334 // Remainder_A_Den = Remainder + Den
335 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
336
337 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
338 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
339 Remainder, Remainder_S_Den, ISD::SETEQ);
340
341 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
342 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
343 Remainder_A_Den, Rem, ISD::SETEQ);
344 SDValue Ops[2];
345 Ops[0] = Div;
346 Ops[1] = Rem;
347 return DAG.getMergeValues(Ops, 2, DL);
348 }
349
350 //===----------------------------------------------------------------------===//
351 // Helper functions
352 //===----------------------------------------------------------------------===//
353
354 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
355 if (ConstantFPSDNode * CFP = dyn_cast(Op)) {
356 return CFP->isExactlyValue(1.0);
357 }
358 if (ConstantSDNode *C = dyn_cast(Op)) {
359 return C->isAllOnesValue();
360 }
361 return false;
362 }
363
364 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
365 if (ConstantFPSDNode * CFP = dyn_cast(Op)) {
366 return CFP->getValueAPF().isZero();
367 }
368 if (ConstantSDNode *C = dyn_cast(Op)) {
369 return C->isNullValue();
370 }
371 return false;
372 }
373
374 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
375 const TargetRegisterClass *RC,
376 unsigned Reg, EVT VT) const {
377 MachineFunction &MF = DAG.getMachineFunction();
378 MachineRegisterInfo &MRI = MF.getRegInfo();
379 unsigned VirtualRegister;
380 if (!MRI.isLiveIn(Reg)) {
381 VirtualRegister = MRI.createVirtualRegister(RC);
382 MRI.addLiveIn(Reg, VirtualRegister);
383 } else {
384 VirtualRegister = MRI.getLiveInVirtReg(Reg);
385 }
386 return DAG.getRegister(VirtualRegister, VT);
387 }
388
389 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
390
391 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
392 switch (Opcode) {
393 default: return 0;
394 // AMDIL DAG nodes
395 NODE_NAME_CASE(MAD);
396 NODE_NAME_CASE(CALL);
397 NODE_NAME_CASE(UMUL);
398 NODE_NAME_CASE(DIV_INF);
399 NODE_NAME_CASE(RET_FLAG);
400 NODE_NAME_CASE(BRANCH_COND);
401
402 // AMDGPU DAG nodes
403 NODE_NAME_CASE(DWORDADDR)
404 NODE_NAME_CASE(FRACT)
405 NODE_NAME_CASE(FMAX)
406 NODE_NAME_CASE(SMAX)
407 NODE_NAME_CASE(UMAX)
408 NODE_NAME_CASE(FMIN)
409 NODE_NAME_CASE(SMIN)
410 NODE_NAME_CASE(UMIN)
411 NODE_NAME_CASE(URECIP)
412 NODE_NAME_CASE(INTERP)
413 NODE_NAME_CASE(INTERP_P0)
414 NODE_NAME_CASE(EXPORT)
415 }
416 }
0 //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Interface definition of the TargetLowering class that is common
11 /// to all AMD GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef AMDGPUISELLOWERING_H
16 #define AMDGPUISELLOWERING_H
17
18 #include "llvm/Target/TargetLowering.h"
19
20 namespace llvm {
21
22 class MachineRegisterInfo;
23
24 class AMDGPUTargetLowering : public TargetLowering {
25 private:
26 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
27 SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
28
29 protected:
30
31 /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
32 /// MachineFunction.
33 ///
34 /// \returns a RegisterSDNode representing Reg.
35 SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
36 unsigned Reg, EVT VT) const;
37
38 bool isHWTrueValue(SDValue Op) const;
39 bool isHWFalseValue(SDValue Op) const;
40
41 public:
42 AMDGPUTargetLowering(TargetMachine &TM);
43
44 virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
45 bool isVarArg,
46 const SmallVectorImpl &Ins,
47 DebugLoc DL, SelectionDAG &DAG,
48 SmallVectorImpl &InVals) const;
49
50 virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
51 bool isVarArg,
52 const SmallVectorImpl &Outs,
53 const SmallVectorImpl &OutVals,
54 DebugLoc DL, SelectionDAG &DAG) const;
55
56 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
57 SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
58 SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
59 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
60 virtual const char* getTargetNodeName(unsigned Opcode) const;
61
62 // Functions defined in AMDILISelLowering.cpp
63 public:
64
65 /// \brief Determine which of the bits specified in \p Mask are known to be
66 /// either zero or one and return them in the \p KnownZero and \p KnownOne
67 /// bitsets.
68 virtual void computeMaskedBitsForTargetNode(const SDValue Op,
69 APInt &KnownZero,
70 APInt &KnownOne,
71 const SelectionDAG &DAG,
72 unsigned Depth = 0) const;
73
74 virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
75 const CallInst &I, unsigned Intrinsic) const;
76
77 /// We want to mark f32/f64 floating point values as legal.
78 bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
79
80 /// We don't want to shrink f64/f32 constants.
81 bool ShouldShrinkFPConstant(EVT VT) const;
82
83 private:
84 void InitAMDILLowering();
85 SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
86 SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
87 SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
88 SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
89 SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
90 SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
91 SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
92 SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
93 SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
94 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
95 EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
96 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
97 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
98 };
99
100 namespace AMDGPUISD {
101
102 enum {
103 // AMDIL ISD Opcodes
104 FIRST_NUMBER = ISD::BUILTIN_OP_END,
105 MAD, // 32bit Fused Multiply Add instruction
106 CALL, // Function call based on a single integer
107 UMUL, // 32bit unsigned multiplication
108 DIV_INF, // Divide with infinity returned on zero divisor
109 RET_FLAG,
110 BRANCH_COND,
111 // End AMDIL ISD Opcodes
112 BITALIGN,
113 DWORDADDR,
114 FRACT,
115 FMAX,
116 SMAX,
117 UMAX,
118 FMIN,
119 SMIN,
120 UMIN,
121 URECIP,
122 INTERP,
123 INTERP_P0,
124 EXPORT,
125 LAST_AMDGPU_ISD_NUMBER
126 };
127
128
129 } // End namespace AMDGPUISD
130
131 namespace SIISD {
132
133 enum {
134 SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
135 VCC_AND,
136 VCC_BITCAST
137 };
138
139 } // End namespace SIISD
140
141 } // End namespace llvm
142
143 #endif // AMDGPUISELLOWERING_H
0 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Implementation of the TargetInstrInfo class that is common to all
11 /// AMD GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPURegisterInfo.h"
17 #include "AMDGPUTargetMachine.h"
18 #include "AMDIL.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22
23 #define GET_INSTRINFO_CTOR
24 #include "AMDGPUGenInstrInfo.inc"
25
26 using namespace llvm;
27
28 AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
29 : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
30
31 const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
32 return RI;
33 }
34
35 bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
36 unsigned &SrcReg, unsigned &DstReg,
37 unsigned &SubIdx) const {
38 // TODO: Implement this function
39 return false;
40 }
41
42 unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
43 int &FrameIndex) const {
44 // TODO: Implement this function
45 return 0;
46 }
47
48 unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
49 int &FrameIndex) const {
50 // TODO: Implement this function
51 return 0;
52 }
53
54 bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
55 const MachineMemOperand *&MMO,
56 int &FrameIndex) const {
57 // TODO: Implement this function
58 return false;
59 }
60 unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
61 int &FrameIndex) const {
62 // TODO: Implement this function
63 return 0;
64 }
65 unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
66 int &FrameIndex) const {
67 // TODO: Implement this function
68 return 0;
69 }
70 bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
71 const MachineMemOperand *&MMO,
72 int &FrameIndex) const {
73 // TODO: Implement this function
74 return false;
75 }
76
77 MachineInstr *
78 AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
79 MachineBasicBlock::iterator &MBBI,
80 LiveVariables *LV) const {
81 // TODO: Implement this function
82 return NULL;
83 }
84 bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
85 MachineBasicBlock &MBB) const {
86 while (iter != MBB.end()) {
87 switch (iter->getOpcode()) {
88 default:
89 break;
90 case AMDGPU::BRANCH_COND_i32:
91 case AMDGPU::BRANCH_COND_f32:
92 case AMDGPU::BRANCH:
93 return true;
94 };
95 ++iter;
96 }
97 return false;
98 }
99
100 MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
101 MachineBasicBlock::iterator tmp = MBB->end();
102 if (!MBB->size()) {
103 return MBB->end();
104 }
105 while (--tmp) {
106 if (tmp->getOpcode() == AMDGPU::ENDLOOP
107 || tmp->getOpcode() == AMDGPU::ENDIF
108 || tmp->getOpcode() == AMDGPU::ELSE) {
109 if (tmp == MBB->begin()) {
110 return tmp;
111 } else {
112 continue;
113 }
114 } else {
115 return ++tmp;
116 }
117 }
118 return MBB->end();
119 }
120
121 void
122 AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
123 MachineBasicBlock::iterator MI,
124 unsigned SrcReg, bool isKill,
125 int FrameIndex,
126 const TargetRegisterClass *RC,
127 const TargetRegisterInfo *TRI) const {
128 assert(!"Not Implemented");
129 }
130
131 void
132 AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MI,
134 unsigned DestReg, int FrameIndex,
135 const TargetRegisterClass *RC,
136 const TargetRegisterInfo *TRI) const {
137 assert(!"Not Implemented");
138 }
139
140 MachineInstr *
141 AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
142 MachineInstr *MI,
143 const SmallVectorImpl &Ops,
144 int FrameIndex) const {
145 // TODO: Implement this function
146 return 0;
147 }
148 MachineInstr*
149 AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
150 MachineInstr *MI,
151 const SmallVectorImpl &Ops,
152 MachineInstr *LoadMI) const {
153 // TODO: Implement this function
154 return 0;
155 }
156 bool
157 AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
158 const SmallVectorImpl &Ops) const {
159 // TODO: Implement this function
160 return false;
161 }
162 bool
163 AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
164 unsigned Reg, bool UnfoldLoad,
165 bool UnfoldStore,
166 SmallVectorImpl &NewMIs) const {
167 // TODO: Implement this function
168 return false;
169 }
170
171 bool
172 AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
173 SmallVectorImpl &NewNodes) const {
174 // TODO: Implement this function
175 return false;
176 }
177
178 unsigned
179 AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
180 bool UnfoldLoad, bool UnfoldStore,
181 unsigned *LoadRegIndex) const {
182 // TODO: Implement this function
183 return 0;
184 }
185
186 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
187 int64_t Offset1, int64_t Offset2,
188 unsigned NumLoads) const {
189 assert(Offset2 > Offset1
190 && "Second offset should be larger than first offset!");
191 // If we have less than 16 loads in a row, and the offsets are within 16,
192 // then schedule together.
193 // TODO: Make the loads schedule near if it fits in a cacheline
194 return (NumLoads < 16 && (Offset2 - Offset1) < 16);
195 }
196
197 bool
198 AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl &Cond)
199 const {
200 // TODO: Implement this function
201 return true;
202 }
203 void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
204 MachineBasicBlock::iterator MI) const {
205 // TODO: Implement this function
206 }
207
208 bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
209 // TODO: Implement this function
210 return false;
211 }
212 bool
213 AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl &Pred1,
214 const SmallVectorImpl &Pred2)
215 const {
216 // TODO: Implement this function
217 return false;
218 }
219
220 bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
221 std::vector &Pred) const {
222 // TODO: Implement this function
223 return false;
224 }
225
226 bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
227 // TODO: Implement this function
228 return MI->getDesc().isPredicable();
229 }
230
231 bool
232 AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
233 // TODO: Implement this function
234 return true;
235 }
236
237 void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
238 DebugLoc DL) const {
239 MachineRegisterInfo &MRI = MF.getRegInfo();
240 const AMDGPURegisterInfo & RI = getRegisterInfo();
241
242 for (unsigned i = 0; i < MI.getNumOperands(); i++) {
243 MachineOperand &MO = MI.getOperand(i);
244 // Convert dst regclass to one that is supported by the ISA
245 if (MO.isReg() && MO.isDef()) {
246 if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
247 const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
248 const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
249
250 assert(newRegClass);
251
252 MRI.setRegClass(MO.getReg(), newRegClass);
253 }
254 }
255 }
256 }
0 //===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Contains the definition of a TargetInstrInfo class that is common
11 /// to all AMD GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef AMDGPUINSTRUCTIONINFO_H
16 #define AMDGPUINSTRUCTIONINFO_H
17
18 #include "AMDGPURegisterInfo.h"
19 #include "AMDGPUInstrInfo.h"
20 #include "llvm/Target/TargetInstrInfo.h"
21
22 #include
23
24 #define GET_INSTRINFO_HEADER
25 #define GET_INSTRINFO_ENUM
26 #include "AMDGPUGenInstrInfo.inc"
27
28 #define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
29 #define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
30 #define OPCODE_IS_ZERO AMDGPU::PRED_SETE
31 #define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
32
33 namespace llvm {
34
35 class AMDGPUTargetMachine;
36 class MachineFunction;
37 class MachineInstr;
38 class MachineInstrBuilder;
39
40 class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
41 private:
42 const AMDGPURegisterInfo RI;
43 TargetMachine &TM;
44 bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
45 MachineBasicBlock &MBB) const;
46 public:
47 explicit AMDGPUInstrInfo(TargetMachine &tm);
48
49 virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
50
51 bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
52 unsigned &DstReg, unsigned &SubIdx) const;
53
54 unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
55 unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
56 int &FrameIndex) const;
57 bool hasLoadFromStackSlot(const MachineInstr *MI,
58 const MachineMemOperand *&MMO,
59 int &FrameIndex) const;
60 unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
61 unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
62 int &FrameIndex) const;
63 bool hasStoreFromStackSlot(const MachineInstr *MI,
64 const MachineMemOperand *&MMO,
65 int &FrameIndex) const;
66
67 MachineInstr *
68 convertToThreeAddress(MachineFunction::iterator &MFI,
69 MachineBasicBlock::iterator &MBBI,
70 LiveVariables *LV) const;
71
72
73 virtual void copyPhysReg(MachineBasicBlock &MBB,
74 MachineBasicBlock::iterator MI, DebugLoc DL,
75 unsigned DestReg, unsigned SrcReg,
76 bool KillSrc) const = 0;
77
78 void storeRegToStackSlot(MachineBasicBlock &MBB,
79 MachineBasicBlock::iterator MI,
80 unsigned SrcReg, bool isKill, int FrameIndex,
81 const TargetRegisterClass *RC,
82 const TargetRegisterInfo *TRI) const;
83 void loadRegFromStackSlot(MachineBasicBlock &MBB,
84 MachineBasicBlock::iterator MI,
85 unsigned DestReg, int FrameIndex,
86 const TargetRegisterClass *RC,
87 const TargetRegisterInfo *TRI) const;
88
89 protected:
90 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
91 MachineInstr *MI,
92 const SmallVectorImpl &Ops,
93 int FrameIndex) const;
94 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
95 MachineInstr *MI,
96 const SmallVectorImpl &Ops,
97 MachineInstr *LoadMI) const;
98 public:
99 bool canFoldMemoryOperand(const MachineInstr *MI,
100 const SmallVectorImpl &Ops) const;
101 bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
102 unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
103 SmallVectorImpl &NewMIs) const;
104 bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
105 SmallVectorImpl &NewNodes) const;
106 unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
107 bool UnfoldLoad, bool UnfoldStore,
108 unsigned *LoadRegIndex = 0) const;
109 bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
110 int64_t Offset1, int64_t Offset2,
111 unsigned NumLoads) const;
112
113 bool ReverseBranchCondition(SmallVectorImpl &Cond) const;
114 void insertNoop(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator MI) const;
116 bool isPredicated(const MachineInstr *MI) const;
117 bool SubsumesPredicate(const SmallVectorImpl &Pred1,
118 const SmallVectorImpl &Pred2) const;
119 bool DefinesPredicate(MachineInstr *MI,
120 std::vector &Pred) const;
121 bool isPredicable(MachineInstr *MI) const;
122 bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
123
124 // Helper functions that check the opcode for status information
125 bool isLoadInst(llvm::MachineInstr *MI) const;
126 bool isExtLoadInst(llvm::MachineInstr *MI) const;
127 bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
128 bool isSExtLoadInst(llvm::MachineInstr *MI) const;
129 bool isZExtLoadInst(llvm::MachineInstr *MI) const;
130 bool isAExtLoadInst(llvm::MachineInstr *MI) const;
131 bool isStoreInst(llvm::MachineInstr *MI) const;
132 bool isTruncStoreInst(llvm::MachineInstr *MI) const;
133
134 virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
135 int64_t Imm) const = 0;
136 virtual unsigned getIEQOpcode() const = 0;
137 virtual bool isMov(unsigned opcode) const = 0;
138
139 /// \brief Convert the AMDIL MachineInstr to a supported ISA
140 /// MachineInstr
141 virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
142 DebugLoc DL) const;
143
144 };
145
146 } // End llvm namespace
147
148 #endif // AMDGPUINSTRINFO_H
0 //===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains DAG node defintions for the AMDGPU target.
10 //
11 //===----------------------------------------------------------------------===//
12
13 //===----------------------------------------------------------------------===//
14 // AMDGPU DAG Profiles
15 //===----------------------------------------------------------------------===//
16
17 def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
18 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
19 ]>;
20
21 //===----------------------------------------------------------------------===//
22 // AMDGPU DAG Nodes
23 //
24
25 // out = ((a << 32) | b) >> c)
26 //
27 // Can be used to optimize rtol:
28 // rotl(a, b) = bitalign(a, a, 32 - b)
29 def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
30
31 // This argument to this node is a dword address.
32 def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
33
34 // out = a - floor(a)
35 def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
36
37 // out = max(a, b) a and b are floats
38 def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
39 [SDNPCommutative, SDNPAssociative]
40 >;
41
42 // out = max(a, b) a and b are signed ints
43 def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
44 [SDNPCommutative, SDNPAssociative]
45 >;
46
47 // out = max(a, b) a and b are unsigned ints
48 def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
49 [SDNPCommutative, SDNPAssociative]
50 >;
51
52 // out = min(a, b) a and b are floats
53 def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
54 [SDNPCommutative, SDNPAssociative]
55 >;
56
57 // out = min(a, b) a snd b are signed ints
58 def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
59 [SDNPCommutative, SDNPAssociative]
60 >;
61
62 // out = min(a, b) a and b are unsigned ints
63 def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
64 [SDNPCommutative, SDNPAssociative]
65 >;
66
67 // urecip - This operation is a helper for integer division, it returns the
68 // result of 1 / a as a fractional unsigned integer.
69 // out = (2^32 / a) + e
70 // e is rounding error
71 def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
72
73 def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
0 //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains instruction defs that are common to all hw codegen
10 // targets.
11 //
12 //===----------------------------------------------------------------------===//
13
14 class AMDGPUInst pattern> : Instruction {
15 field bits<16> AMDILOp = 0;
16 field bits<3> Gen = 0;
17
18 let Namespace = "AMDGPU";
19 let OutOperandList = outs;
20 let InOperandList = ins;
21 let AsmString = asm;
22 let Pattern = pattern;
23 let Itinerary = NullALU;
24 let TSFlags{42-40} = Gen;
25 let TSFlags{63-48} = AMDILOp;
26 }
27
28 class AMDGPUShaderInst pattern>
29 : AMDGPUInst {
30
31 field bits<32> Inst = 0xffffffff;
32
33 }
34
35 def InstFlag : OperandWithDefaultOps ;
36
37 def COND_EQ : PatLeaf <
38 (cond),
39 [{switch(N->get()){{default: return false;
40 case ISD::SETOEQ: case ISD::SETUEQ:
41 case ISD::SETEQ: return true;}}}]
42 >;
43
44 def COND_NE : PatLeaf <
45 (cond),
46 [{switch(N->get()){{default: return false;
47 case ISD::SETONE: case ISD::SETUNE:
48 case ISD::SETNE: return true;}}}]
49 >;
50 def COND_GT : PatLeaf <
51 (cond),
52 [{switch(N->get()){{default: return false;
53 case ISD::SETOGT: case ISD::SETUGT:
54 case ISD::SETGT: return true;}}}]
55 >;
56
57 def COND_GE : PatLeaf <
58 (cond),
59 [{switch(N->get()){{default: return false;
60 case ISD::SETOGE: case ISD::SETUGE:
61 case ISD::SETGE: return true;}}}]
62 >;
63
64 def COND_LT : PatLeaf <
65 (cond),
66 [{switch(N->get()){{default: return false;
67 case ISD::SETOLT: case ISD::SETULT:
68 case ISD::SETLT: return true;}}}]
69 >;
70
71 def COND_LE : PatLeaf <
72 (cond),
73 [{switch(N->get()){{default: return false;
74 case ISD::SETOLE: case ISD::SETULE:
75 case ISD::SETLE: return true;}}}]
76 >;
77
78 //===----------------------------------------------------------------------===//
79 // Load/Store Pattern Fragments
80 //===----------------------------------------------------------------------===//
81
82 def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
83 return isGlobalLoad(dyn_cast(N));
84 }]>;
85
86 class Constants {
87 int TWO_PI = 0x40c90fdb;
88 int PI = 0x40490fdb;
89 int TWO_PI_INV = 0x3e22f983;
90 }
91 def CONST : Constants;
92
93 def FP_ZERO : PatLeaf <
94 (fpimm),
95 [{return N->getValueAPF().isZero();}]
96 >;
97
98 def FP_ONE : PatLeaf <
99 (fpimm),
100 [{return N->isExactlyValue(1.0);}]
101 >;
102
103 let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
104
105 class CLAMP : AMDGPUShaderInst <
106 (outs rc:$dst),
107 (ins rc:$src0),
108 "CLAMP $dst, $src0",
109 [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
110 >;
111
112 class FABS : AMDGPUShaderInst <
113 (outs rc:$dst),
114 (ins rc:$src0),
115 "FABS $dst, $src0",
116 [(set rc:$dst, (fabs rc:$src0))]
117 >;
118
119 class FNEG : AMDGPUShaderInst <
120 (outs rc:$dst),
121 (ins rc:$src0),
122 "FNEG $dst, $src0",
123 [(set rc:$dst, (fneg rc:$src0))]
124 >;
125
126 def SHADER_TYPE : AMDGPUShaderInst <
127 (outs),
128 (ins i32imm:$type),
129 "SHADER_TYPE $type",
130 [(int_AMDGPU_shader_type imm:$type)]
131 >;
132
133 } // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
134
135 /* Generic helper patterns for intrinsics */
136 /* -------------------------------------- */
137
138 class POW_Common
139 RegisterClass rc> : Pat <
140 (fpow rc:$src0, rc:$src1),
141 (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
142 >;
143
144 /* Other helper patterns */
145 /* --------------------- */
146
147 /* Extract element pattern */
148 class Extract_Element
149 RegisterClass vec_class, int sub_idx,
150 SubRegIndex sub_reg>: Pat<
151 (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
152 (EXTRACT_SUBREG vec_class:$src, sub_reg)
153 >;
154
155 /* Insert element pattern */
156 class Insert_Element
157 RegisterClass elem_class, RegisterClass vec_class,
158 int sub_idx, SubRegIndex sub_reg> : Pat <
159
160 (vec_type (vector_insert (vec_type vec_class:$vec),
161 (elem_type elem_class:$elem), sub_idx)),
162 (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
163 >;
164
165 // Vector Build pattern
166 class Vector_Build
167 ValueType elemType, RegisterClass elemClass> : Pat <
168 (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
169 (elemType elemClass:$z), (elemType elemClass:$w))),
170 (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
171 (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
172 elemClass:$z, sel_z), elemClass:$w, sel_w)
173 >;
174
175 // bitconvert pattern
176 class BitConvert : Pat <
177 (dt (bitconvert (st rc:$src0))),
178 (dt rc:$src0)
179 >;
180
181 class DwordAddrPat : Pat <
182 (vt (AMDGPUdwordaddr (vt rc:$addr))),
183 (vt rc:$addr)
184 >;
185
186 include "R600Instructions.td"
187
188 include "SIInstrInfo.td"
189
0 //===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines intrinsics that are used by all hw codegen targets.
10 //
11 //===----------------------------------------------------------------------===//
12
13 let TargetPrefix = "AMDGPU", isTarget = 1 in {
14
15 def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
16 def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
17 def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
18 def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
19 def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
20
21 def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
22 def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
23 def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
24 def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
25 def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
26 def int_AMDGPU_kilp : Intrinsic<[], [], []>;
27 def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
28 def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
29 def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
30 def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
31 def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
32 def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
33 def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
34 def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
35 def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
36 def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
37 def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
38 def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
39 def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
40 def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
41 def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
42 def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
43 def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
44 def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
45 def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
46 def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
47 def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
48 def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
49 def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
50 def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
51 def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
52
53 def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
54 }
55
56 let TargetPrefix = "TGSI", isTarget = 1 in {
57
58 def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
59 }
60
61 include "SIIntrinsics.td"
0 //===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14
15 #include "AMDGPUMCInstLower.h"
16 #include "AMDGPUAsmPrinter.h"
17 #include "R600InstrInfo.h"
18 #include "llvm/CodeGen/MachineBasicBlock.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/Constants.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCStreamer.h"
23 #include "llvm/Support/ErrorHandling.h"
24
25 using namespace llvm;
26
27 AMDGPUMCInstLower::AMDGPUMCInstLower() { }
28
29 void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
30 OutMI.setOpcode(MI->getOpcode());
31
32 for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
33 const MachineOperand &MO = MI->getOperand(i);
34
35 MCOperand MCOp;
36 switch (MO.getType()) {
37 default:
38 llvm_unreachable("unknown operand type");
39 case MachineOperand::MO_FPImmediate: {
40 const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
41 assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
42 "Only floating point immediates are supported at the moment.");
43 MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
44 break;
45 }
46 case MachineOperand::MO_Immediate:
47 MCOp = MCOperand::CreateImm(MO.getImm());
48 break;
49 case MachineOperand::MO_Register:
50 MCOp = MCOperand::CreateReg(MO.getReg());
51 break;
52 }
53 OutMI.addOperand(MCOp);
54 }
55 }
56
57 void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
58 AMDGPUMCInstLower MCInstLowering;
59
60 if (MI->isBundle()) {
61 const MachineBasicBlock *MBB = MI->getParent();
62 MachineBasicBlock::const_instr_iterator I = MI;
63 ++I;
64 while (I != MBB->end() && I->isInsideBundle()) {
65 MCInst MCBundleInst;
66 const MachineInstr *BundledInst = I;
67 MCInstLowering.lower(BundledInst, MCBundleInst);
68 OutStreamer.EmitInstruction(MCBundleInst);
69 ++I;
70 }
71 } else {
72 MCInst TmpInst;
73 MCInstLowering.lower(MI, TmpInst);
74 OutStreamer.EmitInstruction(TmpInst);
75 }
76 }
0 //===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9
10 #ifndef AMDGPU_MCINSTLOWER_H
11 #define AMDGPU_MCINSTLOWER_H
12
13 namespace llvm {
14
15 class MCInst;
16 class MachineInstr;
17
18 class AMDGPUMCInstLower {
19
20 public:
21 AMDGPUMCInstLower();
22
23 /// \brief Lower a MachineInstr to an MCInst
24 void lower(const MachineInstr *MI, MCInst &OutMI) const;
25
26 };
27
28 } // End namespace llvm
29
30 #endif //AMDGPU_MCINSTLOWER_H
0 //===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPURegisterInfo.h"
15 #include "AMDGPUTargetMachine.h"
16
17 using namespace llvm;
18
19 AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
20 const TargetInstrInfo &tii)
21 : AMDGPUGenRegisterInfo(0),
22 TM(tm),
23 TII(tii)
24 { }
25
26 //===----------------------------------------------------------------------===//
27 // Function handling callbacks - Functions are a seldom used feature of GPUS, so
28 // they are not supported at this time.
29 //===----------------------------------------------------------------------===//
30
31 const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
32
33 const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
34 const {
35 return &CalleeSavedReg;
36 }
37
38 void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
39 int SPAdj,
40 RegScavenger *RS) const {
41 assert(!"Subroutines not supported yet");
42 }
43
44 unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
45 assert(!"Subroutines not supported yet");
46 return 0;
47 }
48
49 #define GET_REGINFO_TARGET_DESC
50 #include "AMDGPUGenRegisterInfo.inc"
0 //===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief TargetRegisterInfo interface that is implemented by all hw codegen
11 /// targets.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef AMDGPUREGISTERINFO_H
16 #define AMDGPUREGISTERINFO_H
17
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/Target/TargetRegisterInfo.h"
20
21 #define GET_REGINFO_HEADER
22 #define GET_REGINFO_ENUM
23 #include "AMDGPUGenRegisterInfo.inc"
24
25 namespace llvm {
26
27 class AMDGPUTargetMachine;
28 class TargetInstrInfo;
29
30 struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
31 TargetMachine &TM;
32 const TargetInstrInfo &TII;
33 static const uint16_t CalleeSavedReg;
34
35 AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
36
37 virtual BitVector getReservedRegs(const MachineFunction &MF) const {
38 assert(!"Unimplemented"); return BitVector();
39 }
40
41 /// \param RC is an AMDIL reg class.
42 ///
43 /// \returns The ISA reg class that is equivalent to \p RC.
44 virtual const TargetRegisterClass * getISARegClass(
45 const TargetRegisterClass * RC) const {
46 assert(!"Unimplemented"); return NULL;
47 }
48
49 virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
50 assert(!"Unimplemented"); return NULL;
51 }
52
53 const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
54 void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
55 RegScavenger *RS) const;
56 unsigned getFrameRegister(const MachineFunction &MF) const;
57
58 };
59
60 } // End namespace llvm
61
62 #endif // AMDIDSAREGISTERINFO_H
0 //===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Tablegen register definitions common to all hw codegen targets.
10 //
11 //===----------------------------------------------------------------------===//
12
13 let Namespace = "AMDGPU" in {
14 def sel_x : SubRegIndex;
15 def sel_y : SubRegIndex;
16 def sel_z : SubRegIndex;
17 def sel_w : SubRegIndex;
18 }
19
20 include "R600RegisterInfo.td"
21 include "SIRegisterInfo.td"
0 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUSubtarget.h"
15
16 using namespace llvm;
17
18 #define GET_SUBTARGETINFO_ENUM
19 #define GET_SUBTARGETINFO_TARGET_DESC
20 #define GET_SUBTARGETINFO_CTOR
21 #include "AMDGPUGenSubtargetInfo.inc"
22
23 AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
24 AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
25 InstrItins = getInstrItineraryForCPU(CPU);
26
27 memset(CapsOverride, 0, sizeof(*CapsOverride)
28 * AMDGPUDeviceInfo::MaxNumberCapabilities);
29 // Default card
30 StringRef GPU = CPU;
31 Is64bit = false;
32 DefaultSize[0] = 64;
33 DefaultSize[1] = 1;
34 DefaultSize[2] = 1;
35 ParseSubtargetFeatures(GPU, FS);
36 DevName = GPU;
37 Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
38 }
39
40 AMDGPUSubtarget::~AMDGPUSubtarget() {
41 delete Device;
42 }
43
44 bool
45 AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
46 assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
47 "Caps index is out of bounds!");
48 return CapsOverride[caps];
49 }
50 bool
51 AMDGPUSubtarget::is64bit() const {
52 return Is64bit;
53 }
54 bool
55 AMDGPUSubtarget::isTargetELF() const {
56 return false;
57 }
58 size_t
59 AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
60 if (dim > 3) {
61 return 1;
62 } else {
63 return DefaultSize[dim];
64 }
65 }
66
67 std::string
68 AMDGPUSubtarget::getDataLayout() const {
69 if (!Device) {
70 return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
71 "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
72 "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
73 "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
74 "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
75 }
76 return Device->getDataLayout();
77 }
78
79 std::string
80 AMDGPUSubtarget::getDeviceName() const {
81 return DevName;
82 }
83 const AMDGPUDevice *
84 AMDGPUSubtarget::device() const {
85 return Device;
86 }
0 //=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef AMDGPUSUBTARGET_H
15 #define AMDGPUSUBTARGET_H
16 #include "AMDILDevice.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Target/TargetSubtargetInfo.h"
20
21 #define GET_SUBTARGETINFO_HEADER
22 #include "AMDGPUGenSubtargetInfo.inc"
23
24 #define MAX_CB_SIZE (1 << 16)
25
26 namespace llvm {
27
28 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
29 private:
30 bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
31 const AMDGPUDevice *Device;
32 size_t DefaultSize[3];
33 std::string DevName;
34 bool Is64bit;
35 bool Is32on64bit;
36 bool DumpCode;
37 bool R600ALUInst;
38
39 InstrItineraryData InstrItins;
40
41 public:
42 AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
43 virtual ~AMDGPUSubtarget();
44
45 const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
46 virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
47
48 bool isOverride(AMDGPUDeviceInfo::Caps) const;
49 bool is64bit() const;
50
51 // Helper functions to simplify if statements
52 bool isTargetELF() const;
53 const AMDGPUDevice* device() const;
54 std::string getDataLayout() const;
55 std::string getDeviceName() const;
56 virtual size_t getDefaultSize(uint32_t dim) const;
57 bool dumpCode() const { return DumpCode; }
58 bool r600ALUEncoding() const { return R600ALUInst; }
59
60 };
61
62 } // End namespace llvm
63
64 #endif // AMDGPUSUBTARGET_H
0 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for R600 and SI GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "R600ISelLowering.h"
18 #include "R600InstrInfo.h"
19 #include "SIISelLowering.h"
20 #include "SIInstrInfo.h"
21 #include "llvm/Analysis/Passes.h"
22 #include "llvm/Analysis/Verifier.h"
23 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/MC/MCAsmInfo.h"
27 #include "llvm/PassManager.h"
28 #include "llvm/Support/TargetRegistry.h"
29 #include "llvm/Support/raw_os_ostream.h"
30 #include "llvm/Transforms/IPO.h"
31 #include "llvm/Transforms/Scalar.h"
32 #include
33
34 using namespace llvm;
35
36 extern "C" void LLVMInitializeR600Target() {
37 // Register the target
38 RegisterTargetMachine X(TheAMDGPUTarget);
39 }
40
41 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
42 StringRef CPU, StringRef FS,
43 TargetOptions Options,
44 Reloc::Model RM, CodeModel::Model CM,
45 CodeGenOpt::Level OptLevel
46 )
47 :
48 LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
49 Subtarget(TT, CPU, FS),
50 Layout(Subtarget.getDataLayout()),
51 FrameLowering(TargetFrameLowering::StackGrowsUp,
52 Subtarget.device()->getStackAlignment(), 0),
53 IntrinsicInfo(this),
54 InstrItins(&Subtarget.getInstrItineraryData()) {
55 // TLInfo uses InstrInfo so it must be initialized after.
56 if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
57 InstrInfo = new R600InstrInfo(*this);
58 TLInfo = new R600TargetLowering(*this);
59 } else {
60 InstrInfo = new SIInstrInfo(*this);
61 TLInfo = new SITargetLowering(*this);
62 }
63 }
64
65 AMDGPUTargetMachine::~AMDGPUTargetMachine() {
66 }
67
68 namespace {
69 class AMDGPUPassConfig : public TargetPassConfig {
70 public:
71 AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
72 : TargetPassConfig(TM, PM) {}
73
74 AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
75 return getTM();
76 }
77
78 virtual bool addPreISel();
79 virtual bool addInstSelector();
80 virtual bool addPreRegAlloc();
81 virtual bool addPostRegAlloc();
82 virtual bool addPreSched2();
83 virtual bool addPreEmitPass();
84 };
85 } // End of anonymous namespace
86
87 TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
88 return new AMDGPUPassConfig(this, PM);
89 }
90
91 bool
92 AMDGPUPassConfig::addPreISel() {
93 return false;
94 }
95
96 bool AMDGPUPassConfig::addInstSelector() {
97 addPass(createAMDGPUPeepholeOpt(*TM));
98 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
99 return false;
100 }
101
102 bool AMDGPUPassConfig::addPreRegAlloc() {
103 const AMDGPUSubtarget &ST = TM->getSubtarget();
104
105 if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
106 addPass(createSIAssignInterpRegsPass(*TM));
107 }
108 addPass(createAMDGPUConvertToISAPass(*TM));
109 if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
110 addPass(createSIFixSGPRLivenessPass(*TM));
111 }
112 return false;
113 }
114
115 bool AMDGPUPassConfig::addPostRegAlloc() {
116 return false;
117 }
118
119 bool AMDGPUPassConfig::addPreSched2() {
120
121 addPass(&IfConverterID);
122 return false;
123 }
124
125 bool AMDGPUPassConfig::addPreEmitPass() {
126 addPass(createAMDGPUCFGPreparationPass(*TM));
127 addPass(createAMDGPUCFGStructurizerPass(*TM));
128
129 const AMDGPUSubtarget &ST = TM->getSubtarget();
130 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
131 addPass(createR600ExpandSpecialInstrsPass(*TM));
132 addPass(&FinalizeMachineBundlesID);
133 } else {
134 addPass(createSILowerLiteralConstantsPass(*TM));
135 addPass(createSILowerControlFlowPass(*TM));
136 }
137
138 return false;
139 }
140
0 //===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef AMDGPU_TARGET_MACHINE_H
15 #define AMDGPU_TARGET_MACHINE_H
16
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDILFrameLowering.h"
20 #include "AMDILIntrinsicInfo.h"
21 #include "R600ISelLowering.h"
22 #include "llvm/ADT/OwningPtr.h"
23 #include "llvm/DataLayout.h"
24
25 namespace llvm {
26
27 MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
28
29 class AMDGPUTargetMachine : public LLVMTargetMachine {
30
31 AMDGPUSubtarget Subtarget;
32 const DataLayout Layout;
33 AMDGPUFrameLowering FrameLowering;
34 AMDGPUIntrinsicInfo IntrinsicInfo;
35 const AMDGPUInstrInfo * InstrInfo;
36 AMDGPUTargetLowering * TLInfo;
37 const InstrItineraryData* InstrItins;
38
39 public:
40 AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
41 StringRef CPU,
42 TargetOptions Options,
43 Reloc::Model RM, CodeModel::Model CM,
44 CodeGenOpt::Level OL);
45 ~AMDGPUTargetMachine();
46 virtual const AMDGPUFrameLowering* getFrameLowering() const {
47 return &FrameLowering;
48 }
49 virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
50 return &IntrinsicInfo;
51 }
52 virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
53 virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
54 virtual const AMDGPURegisterInfo *getRegisterInfo() const {
55 return &InstrInfo->getRegisterInfo();
56 }
57 virtual AMDGPUTargetLowering * getTargetLowering() const {
58 return TLInfo;
59 }
60 virtual const InstrItineraryData* getInstrItineraryData() const {
61 return InstrItins;
62 }
63 virtual const DataLayout* getDataLayout() const { return &Layout; }
64 virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
65 };
66
67 } // End namespace llvm
68
69 #endif // AMDGPU_TARGET_MACHINE_H
0 //===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// This file contains the entry points for global functions defined in the LLVM
10 /// AMDGPU back-end.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef AMDIL_H
15 #define AMDIL_H
16
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/Target/TargetMachine.h"
19
20 #define ARENA_SEGMENT_RESERVED_UAVS 12
21 #define DEFAULT_ARENA_UAV_ID 8
22 #define DEFAULT_RAW_UAV_ID 7
23 #define GLOBAL_RETURN_RAW_UAV_ID 11
24 #define HW_MAX_NUM_CB 8
25 #define MAX_NUM_UNIQUE_UAVS 8
26 #define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
27 #define OPENCL_MAX_READ_IMAGES 128
28 #define OPENCL_MAX_WRITE_IMAGES 8
29 #define OPENCL_MAX_SAMPLERS 16
30
31 // The next two values can never be zero, as zero is the ID that is
32 // used to assert against.
33 #define DEFAULT_LDS_ID 1
34 #define DEFAULT_GDS_ID 1
35 #define DEFAULT_SCRATCH_ID 1
36 #define DEFAULT_VEC_SLOTS 8
37
38 #define OCL_DEVICE_RV710 0x0001
39 #define OCL_DEVICE_RV730 0x0002
40 #define OCL_DEVICE_RV770 0x0004
41 #define OCL_DEVICE_CEDAR 0x0008
42 #define OCL_DEVICE_REDWOOD 0x0010
43 #define OCL_DEVICE_JUNIPER 0x0020
44 #define OCL_DEVICE_CYPRESS 0x0040
45 #define OCL_DEVICE_CAICOS 0x0080
46 #define OCL_DEVICE_TURKS 0x0100
47 #define OCL_DEVICE_BARTS 0x0200
48 #define OCL_DEVICE_CAYMAN 0x0400
49 #define OCL_DEVICE_ALL 0x3FFF
50
51 /// The number of function ID's that are reserved for
52 /// internal compiler usage.
53 const unsigned int RESERVED_FUNCS = 1024;
54
55 namespace llvm {
56 class AMDGPUInstrPrinter;
57 class FunctionPass;
58 class MCAsmInfo;
59 class raw_ostream;
60 class Target;
61 class TargetMachine;
62
63 // Instruction selection passes.
64 FunctionPass*
65 createAMDGPUISelDag(TargetMachine &TM);
66 FunctionPass*
67 createAMDGPUPeepholeOpt(TargetMachine &TM);
68
69 // Pre emit passes.
70 FunctionPass*
71 createAMDGPUCFGPreparationPass(TargetMachine &TM);
72 FunctionPass*
73 createAMDGPUCFGStructurizerPass(TargetMachine &TM);
74
75 extern Target TheAMDGPUTarget;
76 } // end namespace llvm;
77
78 // Include device information enumerations
79 #include "AMDILDeviceInfo.h"
80
81 namespace llvm {
82 /// OpenCL uses address spaces to differentiate between
83 /// various memory regions on the hardware. On the CPU
84 /// all of the address spaces point to the same memory,
85 /// however on the GPU, each address space points to
86 /// a seperate piece of memory that is unique from other
87 /// memory locations.
88 namespace AMDGPUAS {
89 enum AddressSpaces {
90 PRIVATE_ADDRESS = 0, ///< Address space for private memory.
91 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
92 CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
93 LOCAL_ADDRESS = 3, ///< Address space for local memory.
94 REGION_ADDRESS = 4, ///< Address space for region memory.
95 ADDRESS_NONE = 5, ///< Address space for unknown memory.
96 PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
97 PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
98 USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
99 LAST_ADDRESS = 9
100 };
101
102 } // namespace AMDGPUAS
103
104 } // end namespace llvm
105 #endif // AMDIL_H
0 //===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 // \file
8 //==-----------------------------------------------------------------------===//
9 #include "AMDIL7XXDevice.h"
10 #include "AMDGPUSubtarget.h"
11 #include "AMDILDevice.h"
12
13 using namespace llvm;
14
15 AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
16 setCaps();
17 std::string name = mSTM->getDeviceName();
18 if (name == "rv710") {
19 DeviceFlag = OCL_DEVICE_RV710;
20 } else if (name == "rv730") {
21 DeviceFlag = OCL_DEVICE_RV730;
22 } else {
23 DeviceFlag = OCL_DEVICE_RV770;
24 }
25 }
26
27 AMDGPU7XXDevice::~AMDGPU7XXDevice() {
28 }
29
30 void AMDGPU7XXDevice::setCaps() {
31 mSWBits.set(AMDGPUDeviceInfo::LocalMem);
32 }
33
34 size_t AMDGPU7XXDevice::getMaxLDSSize() const {
35 if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
36 return MAX_LDS_SIZE_700;
37 }
38 return 0;
39 }
40
41 size_t AMDGPU7XXDevice::getWavefrontSize() const {
42 return AMDGPUDevice::HalfWavefrontSize;
43 }
44
45 uint32_t AMDGPU7XXDevice::getGeneration() const {
46 return AMDGPUDeviceInfo::HD4XXX;
47 }
48
49 uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
50 switch (DeviceID) {
51 default:
52 assert(0 && "ID type passed in is unknown!");
53 break;
54 case GLOBAL_ID:
55 case CONSTANT_ID:
56 case RAW_UAV_ID:
57 case ARENA_UAV_ID:
58 break;
59 case LDS_ID:
60 if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
61 return DEFAULT_LDS_ID;
62 }
63 break;
64 case SCRATCH_ID:
65 if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
66 return DEFAULT_SCRATCH_ID;
67 }
68 break;
69 case GDS_ID:
70 assert(0 && "GDS UAV ID is not supported on this chip");
71 if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
72 return DEFAULT_GDS_ID;
73 }
74 break;
75 };
76
77 return 0;
78 }
79
80 uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
81 return 1;
82 }
83
84 AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
85 setCaps();
86 }
87
88 AMDGPU770Device::~AMDGPU770Device() {
89 }
90
91 void AMDGPU770Device::setCaps() {
92 if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
93 mSWBits.set(AMDGPUDeviceInfo::FMA);
94 mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
95 }
96 mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
97 mHWBits.reset(AMDGPUDeviceInfo::LongOps);
98 mSWBits.set(AMDGPUDeviceInfo::LongOps);
99 mSWBits.set(AMDGPUDeviceInfo::LocalMem);
100 }
101
102 size_t AMDGPU770Device::getWavefrontSize() const {
103 return AMDGPUDevice::WavefrontSize;
104 }
105
106 AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
107 }
108
109 AMDGPU710Device::~AMDGPU710Device() {
110 }
111
112 size_t AMDGPU710Device::getWavefrontSize() const {
113 return AMDGPUDevice::QuarterWavefrontSize;
114 }
0 //==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8 /// \file
9 /// \brief Interface for the subtarget data classes.
10 ///
11 /// This file will define the interface that each generation needs to
12 /// implement in order to correctly answer queries on the capabilities of the
13 /// specific hardware.
14 //===----------------------------------------------------------------------===//
15 #ifndef AMDIL7XXDEVICEIMPL_H
16 #define AMDIL7XXDEVICEIMPL_H
17 #include "AMDILDevice.h"
18
19 namespace llvm {
20 class AMDGPUSubtarget;
21
22 //===----------------------------------------------------------------------===//
23 // 7XX generation of devices and their respective sub classes
24 //===----------------------------------------------------------------------===//
25
26 /// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
27 ///
28 /// All 7XX devices are derived from this class. The AMDGPU7XX device will only
29 /// support the minimal features that are required to be considered OpenCL 1.0
30 /// compliant and nothing more.
31 class AMDGPU7XXDevice : public AMDGPUDevice {
32 public:
33 AMDGPU7XXDevice(AMDGPUSubtarget *ST);
34 virtual ~AMDGPU7XXDevice();
35 virtual size_t getMaxLDSSize() const;
36 virtual size_t getWavefrontSize() const;
37 virtual uint32_t getGeneration() const;
38 virtual uint32_t getResourceID(uint32_t DeviceID) const;
39 virtual uint32_t getMaxNumUAVs() const;
40
41 protected:
42 virtual void setCaps();
43 };
44
45 /// \brief The AMDGPU770Device class represents the RV770 chip and it's
46 /// derivative cards.
47 ///
48 /// The difference between this device and the base class is this device device
49 /// adds support for double precision and has a larger wavefront size.
50 class AMDGPU770Device : public AMDGPU7XXDevice {
51 public:
52 AMDGPU770Device(AMDGPUSubtarget *ST);
53 virtual ~AMDGPU770Device();
54 virtual size_t getWavefrontSize() const;
55 private:
56 virtual void setCaps();
57 };
58
59 /// \brief The AMDGPU710Device class derives from the 7XX base class.
60 ///
61 /// This class is a smaller derivative, so we need to overload some of the
62 /// functions in order to correctly specify this information.
63 class AMDGPU710Device : public AMDGPU7XXDevice {
64 public:
65 AMDGPU710Device(AMDGPUSubtarget *ST);
66 virtual ~AMDGPU710Device();
67 virtual size_t getWavefrontSize() const;
68 };
69
70 } // namespace llvm
71 #endif // AMDILDEVICEIMPL_H
0 //===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // Target-independent interfaces which we are implementing
9 //===----------------------------------------------------------------------===//
10
11 include "llvm/Target/Target.td"
12
13 // Dummy Instruction itineraries for pseudo instructions
14 def ALU_NULL : FuncUnit;
15 def NullALU : InstrItinClass;
16
17 //===----------------------------------------------------------------------===//
18 // AMDIL Subtarget features.
19 //===----------------------------------------------------------------------===//
20 def FeatureFP64 : SubtargetFeature<"fp64",
21 "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
22 "true",
23 "Enable 64bit double precision operations">;
24 def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
25 "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
26 "true",
27 "Enable byte addressable stores">;
28 def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
29 "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
30 "true",
31 "Enable duplicate barrier detection(HD5XXX or later).">;
32 def FeatureImages : SubtargetFeature<"images",
33 "CapsOverride[AMDGPUDeviceInfo::Images]",
34 "true",
35 "Enable image functions">;
36 def FeatureMultiUAV : SubtargetFeature<"multi_uav",
37 "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
38 "true",
39 "Generate multiple UAV code(HD5XXX family or later)">;
40 def FeatureMacroDB : SubtargetFeature<"macrodb",
41 "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
42 "true",
43 "Use internal macrodb, instead of macrodb in driver">;
44 def FeatureNoAlias : SubtargetFeature<"noalias",
45 "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
46 "true",
47 "assert that all kernel argument pointers are not aliased">;
48 def FeatureNoInline : SubtargetFeature<"no-inline",
49 "CapsOverride[AMDGPUDeviceInfo::NoInline]",
50 "true",
51 "specify whether to not inline functions">;
52
53 def Feature64BitPtr : SubtargetFeature<"64BitPtr",
54 "Is64bit",
55 "false",
56 "Specify if 64bit addressing should be used.">;
57
58 def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
59 "Is32on64bit",
60 "false",
61 "Specify if 64bit sized pointers with 32bit addressing should be used.">;
62 def FeatureDebug : SubtargetFeature<"debug",
63 "CapsOverride[AMDGPUDeviceInfo::Debug]",
64 "true",
65 "Debug mode is enabled, so disable hardware accelerated address spaces.">;
66 def FeatureDumpCode : SubtargetFeature <"DumpCode",
67 "DumpCode",
68 "true",
69 "Dump MachineInstrs in the CodeEmitter">;
70
71 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
72 "R600ALUInst",
73 "false",
74 "Older version of ALU instructions encoding.">;
75
76
77 //===----------------------------------------------------------------------===//
78 // Register File, Calling Conv, Instruction Descriptions
79 //===----------------------------------------------------------------------===//
80
81
82 include "AMDILRegisterInfo.td"
83 include "AMDILInstrInfo.td"
84
0 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 /// \file
8 //==-----------------------------------------------------------------------===//
9
10 #define DEBUGME 0
11 #define DEBUG_TYPE "structcfg"
12
13 #include "AMDGPUInstrInfo.h"
14 #include "AMDIL.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/DominatorInternals.h"
19 #include "llvm/Analysis/Dominators.h"
20 #include "llvm/CodeGen/MachinePostDominators.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/Target/TargetInstrInfo.h"
30
31 using namespace llvm;
32
33 // TODO: move-begin.
34
35 //===----------------------------------------------------------------------===//
36 //
37 // Statistics for CFGStructurizer.
38 //
39 //===----------------------------------------------------------------------===//
40
41 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
42 "matched");
43 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
44 "matched");
45 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
46 "pattern matched");
47 STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
48 "pattern matched");
49 STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
50 "matched");
51 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
52 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
53
54 //===----------------------------------------------------------------------===//
55 //
56 // Miscellaneous utility for CFGStructurizer.
57 //
58 //===----------------------------------------------------------------------===//
59 namespace llvmCFGStruct {
60 #define SHOWNEWINSTR(i) \
61 if (DEBUGME) errs() << "New instr: " << *i << "\n"
62
63 #define SHOWNEWBLK(b, msg) \
64 if (DEBUGME) { \
65 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
66 errs() << "\n"; \
67 }
68
69 #define SHOWBLK_DETAIL(b, msg) \
70 if (DEBUGME) { \
71 if (b) { \
72 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
73 b->print(errs()); \
74 errs() << "\n"; \
75 } \
76 }
77
78 #define INVALIDSCCNUM -1
79 #define INVALIDREGNUM 0
80
81 template
82 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
83 for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
84 iterEnd = LoopInfo.end();
85 iter != iterEnd; ++iter) {
86 (*iter)->print(OS, 0);
87 }
88 }
89
90 template
91 void ReverseVector(SmallVector &Src) {
92 size_t sz = Src.size();
93 for (size_t i = 0; i < sz/2; ++i) {
94 NodeT *t = Src[i];
95 Src[i] = Src[sz - i - 1];
96 Src[sz - i - 1] = t;
97 }
98 }
99
100 } //end namespace llvmCFGStruct
101
102 //===----------------------------------------------------------------------===//
103 //
104 // supporting data structure for CFGStructurizer
105 //
106 //===----------------------------------------------------------------------===//
107
108 namespace llvmCFGStruct {
109 template
110 struct CFGStructTraits {
111 };
112
113 template
114 class BlockInformation {
115 public:
116 bool isRetired;
117 int sccNum;
118 //SmallVector succInstr;
119 //Instructions defining the corresponding successor.
120 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
121 };
122
123 template
124 class LandInformation {
125 public:
126 BlockT *landBlk;
127 std::set breakInitRegs; //Registers that need to "reg = 0", before
128 //WHILELOOP(thisloop) init before entering
129 //thisloop.
130 std::set contInitRegs; //Registers that need to "reg = 0", after
131 //WHILELOOP(thisloop) init after entering
132 //thisloop.
133 std::set endbranchInitRegs; //Init before entering this loop, at loop
134 //land block, branch cond on this reg.
135 std::set breakOnRegs; //registers that need to "if (reg) break
136 //endif" after ENDLOOP(thisloop) break
137 //outerLoopOf(thisLoop).
138 std::set contOnRegs; //registers that need to "if (reg) continue
139 //endif" after ENDLOOP(thisloop) continue on
140 //outerLoopOf(thisLoop).
141 LandInformation() : landBlk(NULL) {}
142 };
143
144 } //end of namespace llvmCFGStruct
145
146 //===----------------------------------------------------------------------===//
147 //
148 // CFGStructurizer
149 //
150 //===----------------------------------------------------------------------===//
151
152 namespace llvmCFGStruct {
153 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
154 template
155 class CFGStructurizer {
156 public:
157 typedef enum {
158 Not_SinglePath = 0,
159 SinglePath_InPath = 1,
160 SinglePath_NotInPath = 2
161 } PathToKind;
162
163 public:
164 typedef typename PassT::InstructionType InstrT;
165 typedef typename PassT::FunctionType FuncT;
166 typedef typename PassT::DominatortreeType DomTreeT;
167 typedef typename PassT::PostDominatortreeType PostDomTreeT;
168 typedef typename PassT::DomTreeNodeType DomTreeNodeT;
169 typedef typename PassT::LoopinfoType LoopInfoT;
170
171 typedef GraphTraits FuncGTraits;
172 //typedef FuncGTraits::nodes_iterator BlockIterator;
173 typedef typename FuncT::iterator BlockIterator;
174
175 typedef typename FuncGTraits::NodeType BlockT;
176 typedef GraphTraits BlockGTraits;
177 typedef GraphTraits > InvBlockGTraits;
178 //typedef BlockGTraits::succ_iterator InstructionIterator;
179 typedef typename BlockT::iterator InstrIterator;
180
181 typedef CFGStructTraits CFGTraits;
182 typedef BlockInformation BlockInfo;
183 typedef std::map BlockInfoMap;
184
185 typedef int RegiT;
186 typedef typename PassT::LoopType LoopT;
187 typedef LandInformation LoopLandInfo;
188 typedef std::map LoopLandInfoMap;
189 //landing info for loop break
190 typedef SmallVector BlockTSmallerVector;
191
192 public:
193 CFGStructurizer();
194 ~CFGStructurizer();
195
196 /// Perform the CFG structurization
197 bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
198
199 /// Perform the CFG preparation
200 bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
201
202 private:
203 void reversePredicateSetter(typename BlockT::iterator);
204 void orderBlocks();
205 void printOrderedBlocks(llvm::raw_ostream &OS);
206 int patternMatch(BlockT *CurBlock);
207 int patternMatchGroup(BlockT *CurBlock);
208
209 int serialPatternMatch(BlockT *CurBlock);
210 int ifPatternMatch(BlockT *CurBlock);
211 int switchPatternMatch(BlockT *CurBlock);
212 int loopendPatternMatch(BlockT *CurBlock);
213 int loopPatternMatch(BlockT *CurBlock);
214
215 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
216 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
217 //int loopWithoutBreak(BlockT *);
218
219 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
220 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
221 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
222 BlockT *ContBlock, LoopT *contLoop);
223 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
224 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
225 BlockT *FalseBlock);
226 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
227 BlockT *FalseBlock);
228 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
229 BlockT *FalseBlock, BlockT **LandBlockPtr);
230 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
231 BlockT *FalseBlock, BlockT *LandBlock,
232 bool Detail = false);
233 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
234 bool AllowSideEntry = true);
235 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
236 bool AllowSideEntry = true);
237 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
238 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
239
240 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
241 BlockT *TrueBlock, BlockT *FalseBlock,
242 BlockT *LandBlock);
243 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
244 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
245 BlockT *ExitLandBlock, RegiT SetReg);
246 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
247 RegiT SetReg);
248 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
249 std::set &ExitBlockSet,
250 BlockT *ExitLandBlk);
251 BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
252 BlockTSmallerVector &ExitingBlocks,
253 BlockTSmallerVector &ExitBlocks);
254 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
255 void removeUnconditionalBranch(BlockT *SrcBlock);
256 void removeRedundantConditionalBranch(BlockT *SrcBlock);
257 void addDummyExitBlock(SmallVector &RetBlocks);
258
259 void removeSuccessor(BlockT *SrcBlock);
260 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
261 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
262
263 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
264 InstrIterator InsertPos);
265
266 void recordSccnum(BlockT *SrcBlock, int SCCNum);
267 int getSCCNum(BlockT *srcBlk);
268
269 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
270 bool isRetiredBlock(BlockT *SrcBlock);
271 bool isActiveLoophead(BlockT *CurBlock);
272 bool needMigrateBlock(BlockT *Block);
273
274 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
275 BlockTSmallerVector &exitBlocks,
276 std::set &ExitBlockSet);
277 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
278 BlockT *getLoopLandBlock(LoopT *LoopRep);
279 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
280
281 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
282 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
283 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
284 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
285 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
286
287 bool hasBackEdge(BlockT *curBlock);
288 unsigned getLoopDepth (LoopT *LoopRep);
289 int countActiveBlock(
290 typename SmallVector::const_iterator IterStart,
291 typename SmallVector::const_iterator IterEnd);
292 BlockT *findNearestCommonPostDom(std::set&);
293 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
294
295 private:
296 DomTreeT *domTree;
297 PostDomTreeT *postDomTree;
298 LoopInfoT *loopInfo;
299 PassT *passRep;
300 FuncT *funcRep;
301
302 BlockInfoMap blockInfoMap;
303 LoopLandInfoMap loopLandInfoMap;
304 SmallVector orderedBlks;
305 const AMDGPURegisterInfo *TRI;
306
307 }; //template class CFGStructurizer
308
309 template CFGStructurizer::CFGStructurizer()
310 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
311 }
312
313 template CFGStructurizer::~CFGStructurizer() {
314 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
315 E = blockInfoMap.end(); I != E; ++I) {
316 delete I->second;
317 }
318 }
319
320 template
321 bool CFGStructurizer::prepare(FuncT &func, PassT &pass,
322 const AMDGPURegisterInfo * tri) {
323 passRep = &pass;
324 funcRep = &func;
325 TRI = tri;
326
327 bool changed = false;
328
329 //FIXME: if not reducible flow graph, make it so ???
330
331 if (DEBUGME) {
332 errs() << "AMDGPUCFGStructurizer::prepare\n";
333 }
334
335 loopInfo = CFGTraits::getLoopInfo(pass);
336 if (DEBUGME) {
337 errs() << "LoopInfo:\n";
338 PrintLoopinfo(*loopInfo, errs());
339 }
340
341 orderBlocks();
342 if (DEBUGME) {
343 errs() << "Ordered blocks:\n";
344 printOrderedBlocks(errs());
345 }
346
347 SmallVector retBlks;
348
349 for (typename LoopInfoT::iterator iter = loopInfo->begin(),
350 iterEnd = loopInfo->end();
351 iter != iterEnd; ++iter) {
352 LoopT* loopRep = (*iter);
353 BlockTSmallerVector exitingBlks;
354 loopRep->getExitingBlocks(exitingBlks);
355
356 if (exitingBlks.size() == 0) {
357 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
358 if (dummyExitBlk != NULL)
359 retBlks.push_back(dummyExitBlk);
360 }
361 }
362
363 // Remove unconditional branch instr.
364 // Add dummy exit block iff there are multiple returns.
365
366 for (typename SmallVector::const_iterator
367 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
368 iterBlk != iterEndBlk;
369 ++iterBlk) {
370 BlockT *curBlk = *iterBlk;
371 removeUnconditionalBranch(curBlk);
372 removeRedundantConditionalBranch(curBlk);
373 if (CFGTraits::isReturnBlock(curBlk)) {
374 retBlks.push_back(curBlk);
375 }
376 assert(curBlk->succ_size() <= 2);
377 } //for
378
379 if (retBlks.size() >= 2) {
380 addDummyExitBlock(retBlks);
381 changed = true;
382 }
383
384 return changed;
385 } //CFGStructurizer::prepare
386
387 template
388 bool CFGStructurizer::run(FuncT &func, PassT &pass,
389 const AMDGPURegisterInfo * tri) {
390 passRep = &pass;
391 funcRep = &func;
392 TRI = tri;
393
394 //Assume reducible CFG...
395 if (DEBUGME) {
396 errs() << "AMDGPUCFGStructurizer::run\n";
397 func.viewCFG();
398 }
399
400 domTree = CFGTraits::getDominatorTree(pass);
401 if (DEBUGME) {
402 domTree->print(errs(), (const llvm::Module*)0);
403 }
404
405 postDomTree = CFGTraits::getPostDominatorTree(pass);
406 if (DEBUGME) {
407 postDomTree->print(errs());
408 }
409
410 loopInfo = CFGTraits::getLoopInfo(pass);
411 if (DEBUGME) {
412 errs() << "LoopInfo:\n";
413 PrintLoopinfo(*loopInfo, errs());
414 }
415
416 orderBlocks();
417 #ifdef STRESSTEST
418 //Use the worse block ordering to test the algorithm.
419 ReverseVector(orderedBlks);
420 #endif
421
422 if (DEBUGME) {
423 errs() << "Ordered blocks:\n";
424 printOrderedBlocks(errs());
425 }
426 int numIter = 0;
427 bool finish = false;
428 BlockT *curBlk;
429 bool makeProgress = false;
430 int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
431 orderedBlks.end());
432
433 do {
434 ++numIter;
435 if (DEBUGME) {
436 errs() << "numIter = " << numIter
437 << ", numRemaintedBlk = " << numRemainedBlk << "\n";
438 }
439
440 typename SmallVector::const_iterator
441 iterBlk = orderedBlks.begin();
442 typename SmallVector::const_iterator
443 iterBlkEnd = orderedBlks.end();
444
445 typename SmallVector::const_iterator
446 sccBeginIter = iterBlk;
447 BlockT *sccBeginBlk = NULL;
448 int sccNumBlk = 0; // The number of active blocks, init to a
449 // maximum possible number.
450 int sccNumIter; // Number of iteration in this SCC.
451
452 while (iterBlk != iterBlkEnd) {
453 curBlk = *iterBlk;
454
455 if (sccBeginBlk == NULL) {
456 sccBeginIter = iterBlk;
457 sccBeginBlk = curBlk;
458 sccNumIter = 0;
459 sccNumBlk = numRemainedBlk; // Init to maximum possible number.
460 if (DEBUGME) {
461 errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
462 errs() << "\n";
463 }
464 }
465
466 if (!isRetiredBlock(curBlk)) {
467 patternMatch(curBlk);
468 }
469
470 ++iterBlk;
471
472 bool contNextScc = true;
473 if (iterBlk == iterBlkEnd
474 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
475 // Just finish one scc.
476 ++sccNumIter;
477 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
478 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
479 if (DEBUGME) {
480 errs() << "Can't reduce SCC " << getSCCNum(curBlk)
481 << ", sccNumIter = " << sccNumIter;
482 errs() << "doesn't make any progress\n";
483 }
484 contNextScc = true;
485 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
486 sccNumBlk = sccRemainedNumBlk;
487 iterBlk = sccBeginIter;
488 contNextScc = false;
489 if (DEBUGME) {
490 errs() << "repeat processing SCC" << getSCCNum(curBlk)
491 << "sccNumIter = " << sccNumIter << "\n";
492 func.viewCFG();
493 }
494 } else {
495 // Finish the current scc.
496 contNextScc = true;
497 }
498 } else {
499 // Continue on next component in the current scc.
500 contNextScc = false;
501 }
502
503 if (contNextScc) {
504 sccBeginBlk = NULL;
505 }
506 } //while, "one iteration" over the function.
507
508 BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
509 if (entryBlk->succ_size() == 0) {
510 finish = true;
511 if (DEBUGME) {
512 errs() << "Reduce to one block\n";
513 }
514 } else {
515 int newnumRemainedBlk
516 = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
517 // consider cloned blocks ??
518 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
519 makeProgress = true;
520 numRemainedBlk = newnumRemainedBlk;
521 } else {
522 makeProgress = false;
523 if (DEBUGME) {
524 errs() << "No progress\n";
525 }
526 }
527 }
528 } while (!finish && makeProgress);
529
530 // Misc wrap up to maintain the consistency of the Function representation.
531 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
532
533 // Detach retired Block, release memory.
534 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
535 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
536 if ((*iterMap).second && (*iterMap).second->isRetired) {
537 assert(((*iterMap).first)->getNumber() != -1);
538 if (DEBUGME) {
539 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
540 }
541 (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
542 }
543 delete (*iterMap).second;
544 }
545 blockInfoMap.clear();
546
547 // clear loopLandInfoMap
548 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
549 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
550 delete (*iterMap).second;
551 }
552 loopLandInfoMap.clear();
553
554 if (DEBUGME) {
555 func.viewCFG();
556 }
557
558 if (!finish) {
559 assert(!"IRREDUCIBL_CF");
560 }
561
562 return true;
563 } //CFGStructurizer::run
564
565 /// Print the ordered Blocks.
566 ///
567 template
568 void CFGStructurizer::printOrderedBlocks(llvm::raw_ostream &os) {
569 size_t i = 0;
570 for (typename SmallVector::const_iterator
571 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
572 iterBlk != iterBlkEnd;
573 ++iterBlk, ++i) {
574 os << "BB" << (*iterBlk)->getNumber();
575 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
576 if (i != 0 && i % 10 == 0) {
577 os << "\n";
578 } else {
579 os << " ";
580 }
581 }
582 } //printOrderedBlocks
583
584 /// Compute the reversed DFS post order of Blocks
585 ///
586 template void CFGStructurizer::orderBlocks() {
587 int sccNum = 0;
588 BlockT *bb;
589 for (scc_iterator sccIter = scc_begin(funcRep),
590 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
591 std::vector &sccNext = *sccIter;
592 for (typename std::vector::const_iterator
593 blockIter = sccNext.begin(), blockEnd = sccNext.end();
594 blockIter != blockEnd; ++blockIter) {
595 bb = *blockIter;
596 orderedBlks.push_back(bb);
597 recordSccnum(bb, sccNum);
598 }
599 }
600
601 //walk through all the block in func to check for unreachable
602 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
603 blockEnd1 = FuncGTraits::nodes_end(funcRep);
604 blockIter1 != blockEnd1; ++blockIter1) {
605 BlockT *bb = &(*blockIter1);
606 sccNum = getSCCNum(bb);
607 if (sccNum == INVALIDSCCNUM) {
608 errs() << "unreachable block BB" << bb->getNumber() << "\n";
609 }
610 }
611 } //orderBlocks
612
613 template int CFGStructurizer::patternMatch(BlockT *curBlk) {
614 int numMatch = 0;
615 int curMatch;
616
617 if (DEBUGME) {
618 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
619 }
620
621 while ((curMatch = patternMatchGroup(curBlk)) > 0) {
622 numMatch += curMatch;
623 }
624
625 if (DEBUGME) {
626 errs() << "End patternMatch BB" << curBlk->getNumber()
627 << ", numMatch = " << numMatch << "\n";
628 }
629
630 return numMatch;
631 } //patternMatch
632
633 template
634 int CFGStructurizer::patternMatchGroup(BlockT *curBlk) {
635 int numMatch = 0;
636 numMatch += serialPatternMatch(curBlk);
637 numMatch += ifPatternMatch(curBlk);
638 numMatch += loopendPatternMatch(curBlk);
639 numMatch += loopPatternMatch(curBlk);
640 return numMatch;
641 }//patternMatchGroup
642
643 template
644 int CFGStructurizer::serialPatternMatch(BlockT *curBlk) {
645 if (curBlk->succ_size() != 1) {
646 return 0;
647 }
648
649 BlockT *childBlk = *curBlk->succ_begin();
650 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
651 return 0;
652 }
653
654 mergeSerialBlock(curBlk, childBlk);
655 ++numSerialPatternMatch;
656 return 1;
657 } //serialPatternMatch
658
659 template
660 int CFGStructurizer::ifPatternMatch(BlockT *curBlk) {
661 //two edges
662 if (curBlk->succ_size() != 2) {
663 return 0;
664 }
665
666 if (hasBackEdge(curBlk)) {
667 return 0;
668 }
669
670 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
671 if (branchInstr == NULL) {
672 return 0;
673 }
674
675 assert(CFGTraits::isCondBranch(branchInstr));
676
677 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
678 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
679 BlockT *landBlk;
680 int cloned = 0;
681
682 // TODO: Simplify
683 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
684 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
685 landBlk = *trueBlk->succ_begin();
686 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
687 landBlk = NULL;
688 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
689 landBlk = falseBlk;
690 falseBlk = NULL;
691 } else if (falseBlk->succ_size() == 1
692 && *falseBlk->succ_begin() == trueBlk) {
693 landBlk = trueBlk;
694 trueBlk = NULL;
695 } else if (falseBlk->succ_size() == 1
696 && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
697 landBlk = *falseBlk->succ_begin();
698 } else if (trueBlk->succ_size() == 1
699 && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
700 landBlk = *trueBlk->succ_begin();
701 } else {
702 return handleJumpintoIf(curBlk, trueBlk, falseBlk);
703 }
704
705 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
706 // new BB created for landBlk==NULL may introduce new challenge to the
707 // reduction process.
708 if (landBlk != NULL &&
709 ((trueBlk && trueBlk->pred_size() > 1)
710 || (falseBlk && falseBlk->pred_size() > 1))) {
711 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
712 }
713
714 if (trueBlk && trueBlk->pred_size() > 1) {
715 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
716 ++cloned;
717 }
718
719 if (falseBlk && falseBlk->pred_size() > 1) {
720 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
721 ++cloned;
722 }
723
724 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
725
726 ++numIfPatternMatch;
727
728 numClonedBlock += cloned;
729
730 return 1 + cloned;
731 } //ifPatternMatch
732
733 template
734 int CFGStructurizer::switchPatternMatch(BlockT *curBlk) {
735 return 0;
736 } //switchPatternMatch
737
738 template
739 int CFGStructurizer::loopendPatternMatch(BlockT *curBlk) {
740 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
741 typename std::vector nestedLoops;
742 while (loopRep) {
743 nestedLoops.push_back(loopRep);
744 loopRep = loopRep->getParentLoop();
745