llvm.org GIT mirror llvm / 23dc769
AMDGPU: Add core backend files for R600/SI codegen v6 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160270 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 7 years ago
114 changed file(s) with 28329 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef AMDGPU_H
10 #define AMDGPU_H
11
12 #include "AMDGPUTargetMachine.h"
13 #include "llvm/Support/TargetRegistry.h"
14 #include "llvm/Target/TargetMachine.h"
15
16 namespace llvm {
17
18 class FunctionPass;
19 class AMDGPUTargetMachine;
20
21 // R600 Passes
22 FunctionPass* createR600KernelParametersPass(const TargetData* TD);
23 FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
24
25 // SI Passes
26 FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
27 FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
28
29 // Passes common to R600 and SI
30 FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
31
32 } // End namespace llvm
33
34 #endif // AMDGPU_H
0 //===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8
9 // Include AMDIL TD files
10 include "AMDILBase.td"
11 include "AMDILVersion.td"
12
13 // Include AMDGPU TD files
14 include "R600Schedule.td"
15 include "SISchedule.td"
16 include "Processors.td"
17 include "AMDGPUInstrInfo.td"
18 include "AMDGPUIntrinsics.td"
19 include "AMDGPURegisterInfo.td"
20 include "AMDGPUInstructions.td"
0 //===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass lowers AMDIL machine instructions to the appropriate hardware
10 // instructions.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "llvm/CodeGen/MachineFunctionPass.h"
17
18 #include
19 using namespace llvm;
20
21 namespace {
22
23 class AMDGPUConvertToISAPass : public MachineFunctionPass {
24
25 private:
26 static char ID;
27 TargetMachine &TM;
28
29 public:
30 AMDGPUConvertToISAPass(TargetMachine &tm) :
31 MachineFunctionPass(ID), TM(tm) { }
32
33 virtual bool runOnMachineFunction(MachineFunction &MF);
34
35 virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
36
37 };
38
39 } // End anonymous namespace
40
41 char AMDGPUConvertToISAPass::ID = 0;
42
43 FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
44 return new AMDGPUConvertToISAPass(tm);
45 }
46
47 bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
48 {
49 const AMDGPUInstrInfo * TII =
50 static_cast(TM.getInstrInfo());
51
52 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
53 BB != BB_E; ++BB) {
54 MachineBasicBlock &MBB = *BB;
55 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
56 I != E; ++I) {
57 MachineInstr &MI = *I;
58 TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
59 }
60 }
61 return false;
62 }
0 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is the parent TargetLowering class for hardware code gen targets.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPUISelLowering.h"
14 #include "AMDILIntrinsicInfo.h"
15 #include "AMDGPUUtil.h"
16 #include "llvm/CodeGen/MachineRegisterInfo.h"
17
18 using namespace llvm;
19
20 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
21 AMDILTargetLowering(TM)
22 {
23 // We need to custom lower some of the intrinsics
24 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
25
26 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
27 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
28
29 // Library functions. These default to Expand, but we have instructions
30 // for them.
31 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
32 setOperationAction(ISD::FEXP2, MVT::f32, Legal);
33 setOperationAction(ISD::FRINT, MVT::f32, Legal);
34
35 setOperationAction(ISD::UDIV, MVT::i32, Expand);
36 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
37 setOperationAction(ISD::UREM, MVT::i32, Expand);
38 }
39
40 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
41 const
42 {
43 switch (Op.getOpcode()) {
44 default: return AMDILTargetLowering::LowerOperation(Op, DAG);
45 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
46 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
47 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
48 }
49 }
50
51 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
52 SelectionDAG &DAG) const
53 {
54 unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue();
55 DebugLoc DL = Op.getDebugLoc();
56 EVT VT = Op.getValueType();
57
58 switch (IntrinsicID) {
59 default: return Op;
60 case AMDGPUIntrinsic::AMDIL_abs:
61 return LowerIntrinsicIABS(Op, DAG);
62 case AMDGPUIntrinsic::AMDIL_exp:
63 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
64 case AMDGPUIntrinsic::AMDIL_fabs:
65 return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
66 case AMDGPUIntrinsic::AMDGPU_lrp:
67 return LowerIntrinsicLRP(Op, DAG);
68 case AMDGPUIntrinsic::AMDIL_fraction:
69 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
70 case AMDGPUIntrinsic::AMDIL_mad:
71 return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
72 Op.getOperand(2), Op.getOperand(3));
73 case AMDGPUIntrinsic::AMDIL_max:
74 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
75 Op.getOperand(2));
76 case AMDGPUIntrinsic::AMDGPU_imax:
77 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
78 Op.getOperand(2));
79 case AMDGPUIntrinsic::AMDGPU_umax:
80 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
81 Op.getOperand(2));
82 case AMDGPUIntrinsic::AMDIL_min:
83 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
84 Op.getOperand(2));
85 case AMDGPUIntrinsic::AMDGPU_imin:
86 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
87 Op.getOperand(2));
88 case AMDGPUIntrinsic::AMDGPU_umin:
89 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
90 Op.getOperand(2));
91 case AMDGPUIntrinsic::AMDIL_round_nearest:
92 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
93 case AMDGPUIntrinsic::AMDIL_round_posinf:
94 return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
95 }
96 }
97
98 ///IABS(a) = SMAX(sub(0, a), a)
99 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
100 SelectionDAG &DAG) const
101 {
102
103 DebugLoc DL = Op.getDebugLoc();
104 EVT VT = Op.getValueType();
105 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
106 Op.getOperand(1));
107
108 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
109 }
110
111 /// Linear Interpolation
112 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
113 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
114 SelectionDAG &DAG) const
115 {
116 DebugLoc DL = Op.getDebugLoc();
117 EVT VT = Op.getValueType();
118 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
119 DAG.getConstantFP(1.0f, MVT::f32),
120 Op.getOperand(1));
121 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
122 Op.getOperand(3));
123 return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
124 Op.getOperand(2),
125 OneSubAC);
126 }
127
128 SDValue AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op,
129 SelectionDAG &DAG) const
130 {
131 DebugLoc DL = Op.getDebugLoc();
132 EVT VT = Op.getValueType();
133
134 SDValue LHS = Op.getOperand(0);
135 SDValue RHS = Op.getOperand(1);
136 SDValue True = Op.getOperand(2);
137 SDValue False = Op.getOperand(3);
138 SDValue CC = Op.getOperand(4);
139 ISD::CondCode CCOpcode = cast(CC)->get();
140 SDValue Temp;
141
142 // LHS and RHS are guaranteed to be the same value type
143 EVT CompareVT = LHS.getValueType();
144
145 // We need all the operands of SELECT_CC to have the same value type, so if
146 // necessary we need to convert LHS and RHS to be the same type True and
147 // False. True and False are guaranteed to have the same type as this
148 // SELECT_CC node.
149
150 if (CompareVT != VT) {
151 ISD::NodeType ConversionOp = ISD::DELETED_NODE;
152 if (VT == MVT::f32 && CompareVT == MVT::i32) {
153 if (isUnsignedIntSetCC(CCOpcode)) {
154 ConversionOp = ISD::UINT_TO_FP;
155 } else {
156 ConversionOp = ISD::SINT_TO_FP;
157 }
158 } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
159 ConversionOp = ISD::FP_TO_SINT;
160 } else {
161 // I don't think there will be any other type pairings.
162 assert(!"Unhandled operand type parings in SELECT_CC");
163 }
164 // XXX Check the value of LHS and RHS and avoid creating sequences like
165 // (FTOI (ITOF))
166 LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
167 RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
168 }
169
170 // If True is a hardware TRUE value and False is a hardware FALSE value or
171 // vice-versa we can handle this with a native instruction (SET* instructions).
172 if ((isHWTrueValue(True) && isHWFalseValue(False))) {
173 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
174 }
175
176 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
177 // we can handle this with a native instruction, but we need to swap true
178 // and false and change the conditional.
179 if (isHWTrueValue(False) && isHWFalseValue(True)) {
180 }
181
182 // XXX Check if we can lower this to a SELECT or if it is supported by a native
183 // operation. (The code below does this but we don't have the Instruction
184 // selection patterns to do this yet.
185 #if 0
186 if (isZero(LHS) || isZero(RHS)) {
187 SDValue Cond = (isZero(LHS) ? RHS : LHS);
188 bool SwapTF = false;
189 switch (CCOpcode) {
190 case ISD::SETOEQ:
191 case ISD::SETUEQ:
192 case ISD::SETEQ:
193 SwapTF = true;
194 // Fall through
195 case ISD::SETONE:
196 case ISD::SETUNE:
197 case ISD::SETNE:
198 // We can lower to select
199 if (SwapTF) {
200 Temp = True;
201 True = False;
202 False = Temp;
203 }
204 // CNDE
205 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
206 default:
207 // Supported by a native operation (CNDGE, CNDGT)
208 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
209 }
210 }
211 #endif
212
213 // If we make it this for it means we have no native instructions to handle
214 // this SELECT_CC, so we must lower it.
215 SDValue HWTrue, HWFalse;
216
217 if (VT == MVT::f32) {
218 HWTrue = DAG.getConstantFP(1.0f, VT);
219 HWFalse = DAG.getConstantFP(0.0f, VT);
220 } else if (VT == MVT::i32) {
221 HWTrue = DAG.getConstant(-1, VT);
222 HWFalse = DAG.getConstant(0, VT);
223 }
224 else {
225 assert(!"Unhandled value type in LowerSELECT_CC");
226 }
227
228 // Lower this unsupported SELECT_CC into a combination of two supported
229 // SELECT_CC operations.
230 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
231
232 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
233 }
234
235
236 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
237 SelectionDAG &DAG) const
238 {
239 DebugLoc DL = Op.getDebugLoc();
240 EVT VT = Op.getValueType();
241
242 SDValue Num = Op.getOperand(0);
243 SDValue Den = Op.getOperand(1);
244
245 SmallVector Results;
246
247 // RCP = URECIP(Den) = 2^32 / Den + e
248 // e is rounding error.
249 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
250
251 // RCP_LO = umulo(RCP, Den) */
252 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
253
254 // RCP_HI = mulhu (RCP, Den) */
255 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
256
257 // NEG_RCP_LO = -RCP_LO
258 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
259 RCP_LO);
260
261 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
262 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
263 NEG_RCP_LO, RCP_LO,
264 ISD::SETEQ);
265 // Calculate the rounding error from the URECIP instruction
266 // E = mulhu(ABS_RCP_LO, RCP)
267 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
268
269 // RCP_A_E = RCP + E
270 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
271
272 // RCP_S_E = RCP - E
273 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
274
275 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
276 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
277 RCP_A_E, RCP_S_E,
278 ISD::SETEQ);
279 // Quotient = mulhu(Tmp0, Num)
280 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
281
282 // Num_S_Remainder = Quotient * Den
283 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
284
285 // Remainder = Num - Num_S_Remainder
286 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
287
288 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
289 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
290 DAG.getConstant(-1, VT),
291 DAG.getConstant(0, VT),
292 ISD::SETGE);
293 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
294 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
295 DAG.getConstant(0, VT),
296 DAG.getConstant(-1, VT),
297 DAG.getConstant(0, VT),
298 ISD::SETGE);
299 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
300 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
301 Remainder_GE_Zero);
302
303 // Calculate Division result:
304
305 // Quotient_A_One = Quotient + 1
306 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
307 DAG.getConstant(1, VT));
308
309 // Quotient_S_One = Quotient - 1
310 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
311 DAG.getConstant(1, VT));
312
313 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
314 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
315 Quotient, Quotient_A_One, ISD::SETEQ);
316
317 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
318 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
319 Quotient_S_One, Div, ISD::SETEQ);
320
321 // Calculate Rem result:
322
323 // Remainder_S_Den = Remainder - Den
324 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
325
326 // Remainder_A_Den = Remainder + Den
327 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
328
329 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
330 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
331 Remainder, Remainder_S_Den, ISD::SETEQ);
332
333 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
334 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
335 Remainder_A_Den, Rem, ISD::SETEQ);
336
337 DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
338 DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
339
340 return Op;
341 }
342
343 //===----------------------------------------------------------------------===//
344 // Helper functions
345 //===----------------------------------------------------------------------===//
346
347 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
348 {
349 if (ConstantFPSDNode * CFP = dyn_cast(Op)) {
350 return CFP->isExactlyValue(1.0);
351 }
352 if (ConstantSDNode *C = dyn_cast(Op)) {
353 return C->isAllOnesValue();
354 }
355 return false;
356 }
357
358 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
359 {
360 if (ConstantFPSDNode * CFP = dyn_cast(Op)) {
361 return CFP->getValueAPF().isZero();
362 }
363 if (ConstantSDNode *C = dyn_cast(Op)) {
364 return C->isNullValue();
365 }
366 return false;
367 }
368
369 void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
370 MachineFunction * MF, MachineRegisterInfo & MRI,
371 const TargetInstrInfo * TII, unsigned reg) const
372 {
373 AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
374 }
375
376 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
377
378 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
379 {
380 switch (Opcode) {
381 default: return AMDILTargetLowering::getTargetNodeName(Opcode);
382
383 NODE_NAME_CASE(FRACT)
384 NODE_NAME_CASE(FMAX)
385 NODE_NAME_CASE(SMAX)
386 NODE_NAME_CASE(UMAX)
387 NODE_NAME_CASE(FMIN)
388 NODE_NAME_CASE(SMIN)
389 NODE_NAME_CASE(UMIN)
390 NODE_NAME_CASE(URECIP)
391 }
392 }
0 //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the interface defintiion of the TargetLowering class
10 // that is common to all AMD GPUs.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef AMDGPUISELLOWERING_H
15 #define AMDGPUISELLOWERING_H
16
17 #include "AMDILISelLowering.h"
18
19 namespace llvm {
20
21 class AMDGPUTargetLowering : public AMDILTargetLowering
22 {
23 private:
24 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
25 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
26 SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
27
28 protected:
29
30 /// addLiveIn - This functions adds reg to the live in list of the entry block
31 /// and emits a copy from reg to MI.getOperand(0).
32 ///
33 // Some registers are loaded with values before the program
34 /// begins to execute. The loading of these values is modeled with pseudo
35 /// instructions which are lowered using this function.
36 void addLiveIn(MachineInstr * MI, MachineFunction * MF,
37 MachineRegisterInfo & MRI, const TargetInstrInfo * TII,
38 unsigned reg) const;
39
40 bool isHWTrueValue(SDValue Op) const;
41 bool isHWFalseValue(SDValue Op) const;
42
43 public:
44 AMDGPUTargetLowering(TargetMachine &TM);
45
46 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
47 SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
48 SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
49 virtual const char* getTargetNodeName(unsigned Opcode) const;
50
51 };
52
53 namespace AMDGPUISD
54 {
55
56 enum
57 {
58 AMDGPU_FIRST = AMDILISD::LAST_ISD_NUMBER,
59 BITALIGN,
60 FRACT,
61 FMAX,
62 SMAX,
63 UMAX,
64 FMIN,
65 SMIN,
66 UMIN,
67 URECIP,
68 LAST_AMDGPU_ISD_NUMBER
69 };
70
71
72 } // End namespace AMDGPUISD
73
74 } // End namespace llvm
75
76 #endif // AMDGPUISELLOWERING_H
0 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation of the TargetInstrInfo class that is
10 // common to all AMD GPUs.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUInstrInfo.h"
15 #include "AMDGPURegisterInfo.h"
16 #include "AMDGPUTargetMachine.h"
17 #include "AMDIL.h"
18 #include "llvm/CodeGen/MachineRegisterInfo.h"
19
20 using namespace llvm;
21
22 AMDGPUInstrInfo::AMDGPUInstrInfo(AMDGPUTargetMachine &tm)
23 : AMDILInstrInfo(tm) { }
24
25 void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
26 DebugLoc DL) const
27 {
28 MachineRegisterInfo &MRI = MF.getRegInfo();
29 const AMDGPURegisterInfo & RI = getRegisterInfo();
30
31 for (unsigned i = 0; i < MI.getNumOperands(); i++) {
32 MachineOperand &MO = MI.getOperand(i);
33 // Convert dst regclass to one that is supported by the ISA
34 if (MO.isReg() && MO.isDef()) {
35 if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
36 const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
37 const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
38
39 assert(newRegClass);
40
41 MRI.setRegClass(MO.getReg(), newRegClass);
42 }
43 }
44 }
45 }
0 //===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the definition of a TargetInstrInfo class that is common
10 // to all AMD GPUs.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef AMDGPUINSTRUCTIONINFO_H_
15 #define AMDGPUINSTRUCTIONINFO_H_
16
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDILInstrInfo.h"
19
20 #include
21
22 namespace llvm {
23
24 class AMDGPUTargetMachine;
25 class MachineFunction;
26 class MachineInstr;
27 class MachineInstrBuilder;
28
29 class AMDGPUInstrInfo : public AMDILInstrInfo {
30
31 public:
32 explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm);
33
34 virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
35
36 /// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
37 /// MachineInstr
38 virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
39 DebugLoc DL) const;
40
41 };
42
43 } // End llvm namespace
44
45 #endif // AMDGPUINSTRINFO_H_
0 //===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains DAG node defintions for the AMDGPU target.
10 //
11 //===----------------------------------------------------------------------===//
12
13 //===----------------------------------------------------------------------===//
14 // AMDGPU DAG Profiles
15 //===----------------------------------------------------------------------===//
16
17 def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
18 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
19 ]>;
20
21 //===----------------------------------------------------------------------===//
22 // AMDGPU DAG Nodes
23 //
24
25 // out = ((a << 32) | b) >> c)
26 //
27 // Can be used to optimize rtol:
28 // rotl(a, b) = bitalign(a, a, 32 - b)
29 def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
30
31 // out = a - floor(a)
32 def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
33
34 // out = max(a, b) a and b are floats
35 def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
36 [SDNPCommutative, SDNPAssociative]
37 >;
38
39 // out = max(a, b) a and b are signed ints
40 def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
41 [SDNPCommutative, SDNPAssociative]
42 >;
43
44 // out = max(a, b) a and b are unsigned ints
45 def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
46 [SDNPCommutative, SDNPAssociative]
47 >;
48
49 // out = min(a, b) a and b are floats
50 def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
51 [SDNPCommutative, SDNPAssociative]
52 >;
53
54 // out = min(a, b) a snd b are signed ints
55 def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
56 [SDNPCommutative, SDNPAssociative]
57 >;
58
59 // out = min(a, b) a and b are unsigned ints
60 def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
61 [SDNPCommutative, SDNPAssociative]
62 >;
63
64 // urecip - This operation is a helper for integer division, it returns the
65 // result of 1 / a as a fractional unsigned integer.
66 // out = (2^32 / a) + e
67 // e is rounding error
68 def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
0 //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains instruction defs that are common to all hw codegen
10 // targets.
11 //
12 //===----------------------------------------------------------------------===//
13
14 class AMDGPUInst pattern> : Instruction {
15 field bits<16> AMDILOp = 0;
16 field bits<3> Gen = 0;
17
18 let Namespace = "AMDGPU";
19 let OutOperandList = outs;
20 let InOperandList = ins;
21 let AsmString = asm;
22 let Pattern = pattern;
23 let Itinerary = NullALU;
24 let TSFlags{42-40} = Gen;
25 let TSFlags{63-48} = AMDILOp;
26 }
27
28 class AMDGPUShaderInst pattern>
29 : AMDGPUInst {
30
31 field bits<32> Inst = 0xffffffff;
32
33 }
34
35 class Constants {
36 int TWO_PI = 0x40c90fdb;
37 int PI = 0x40490fdb;
38 int TWO_PI_INV = 0x3e22f983;
39 }
40 def CONST : Constants;
41
42 def FP_ZERO : PatLeaf <
43 (fpimm),
44 [{return N->getValueAPF().isZero();}]
45 >;
46
47 def FP_ONE : PatLeaf <
48 (fpimm),
49 [{return N->isExactlyValue(1.0);}]
50 >;
51
52 let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
53
54 class CLAMP : AMDGPUShaderInst <
55 (outs rc:$dst),
56 (ins rc:$src0),
57 "CLAMP $dst, $src0",
58 [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
59 >;
60
61 class FABS : AMDGPUShaderInst <
62 (outs rc:$dst),
63 (ins rc:$src0),
64 "FABS $dst, $src0",
65 [(set rc:$dst, (fabs rc:$src0))]
66 >;
67
68 class FNEG : AMDGPUShaderInst <
69 (outs rc:$dst),
70 (ins rc:$src0),
71 "FNEG $dst, $src0",
72 [(set rc:$dst, (fneg rc:$src0))]
73 >;
74
75 } // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
76
77 /* Generic helper patterns for intrinsics */
78 /* -------------------------------------- */
79
80 class POW_Common
81 RegisterClass rc> : Pat <
82 (int_AMDGPU_pow rc:$src0, rc:$src1),
83 (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
84 >;
85
86 /* Other helper patterns */
87 /* --------------------- */
88
89 /* Extract element pattern */
90 class Extract_Element
91 RegisterClass vec_class, int sub_idx,
92 SubRegIndex sub_reg>: Pat<
93 (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
94 (EXTRACT_SUBREG vec_class:$src, sub_reg)
95 >;
96
97 /* Insert element pattern */
98 class Insert_Element
99 RegisterClass elem_class, RegisterClass vec_class,
100 int sub_idx, SubRegIndex sub_reg> : Pat <
101
102 (vec_type (vector_insert (vec_type vec_class:$vec),
103 (elem_type elem_class:$elem), sub_idx)),
104 (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
105 >;
106
107 // Vector Build pattern
108 class Vector_Build : Pat <
109 (IL_vbuild elemClass:$src),
110 (INSERT_SUBREG (vecType (IMPLICIT_DEF)), elemClass:$src, sel_x)
111 >;
112
113 // bitconvert pattern
114 class BitConvert : Pat <
115 (dt (bitconvert (st rc:$src0))),
116 (dt rc:$src0)
117 >;
118
119 include "R600Instructions.td"
120
121 include "SIInstrInfo.td"
122
0 //===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines intrinsics that are used by all hw codegen targets.
10 //
11 //===----------------------------------------------------------------------===//
12
13 let TargetPrefix = "AMDGPU", isTarget = 1 in {
14
15 def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
16 def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
17 def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
18 def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], [IntrNoMem]>;
19 def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
20
21 def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
22 def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
23 def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
24 def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
25 def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
26 def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
27 def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
28 def int_AMDGPU_kilp : Intrinsic<[], [], []>;
29 def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
30 def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
31 def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
32 def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
33 def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
34 def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
35 def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
36 def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
37 def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
38 def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
39 def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
40 def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
41 def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
42 def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
43 def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
44 def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
45 def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
46 def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
47 def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
48 def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
49 def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
50 def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
51 def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
52 def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
53 def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
54 def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
55 def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
56 }
57
58 let TargetPrefix = "TGSI", isTarget = 1 in {
59
60 def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
61 }
62
63 include "SIIntrinsics.td"
0 //===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Parent TargetRegisterInfo class common to all hw codegen targets.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPURegisterInfo.h"
14 #include "AMDGPUTargetMachine.h"
15
16 using namespace llvm;
17
18 AMDGPURegisterInfo::AMDGPURegisterInfo(AMDGPUTargetMachine &tm,
19 const TargetInstrInfo &tii)
20 : AMDILRegisterInfo(tm, tii),
21 TM(tm),
22 TII(tii)
23 { }
0 //===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the TargetRegisterInfo interface that is implemented
10 // by all hw codegen targets.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef AMDGPUREGISTERINFO_H_
15 #define AMDGPUREGISTERINFO_H_
16
17 #include "AMDILRegisterInfo.h"
18
19 namespace llvm {
20
21 class AMDGPUTargetMachine;
22 class TargetInstrInfo;
23
24 struct AMDGPURegisterInfo : public AMDILRegisterInfo
25 {
26 AMDGPUTargetMachine &TM;
27 const TargetInstrInfo &TII;
28
29 AMDGPURegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
30
31 virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
32
33 /// getISARegClass - rc is an AMDIL reg class. This function returns the
34 /// ISA reg class that is equivalent to the given AMDIL reg class.
35 virtual const TargetRegisterClass *
36 getISARegClass(const TargetRegisterClass * rc) const = 0;
37 };
38
39 } // End namespace llvm
40
41 #endif // AMDIDSAREGISTERINFO_H_
0 //===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Tablegen register definitions common to all hw codegen targets.
10 //
11 //===----------------------------------------------------------------------===//
12
13 let Namespace = "AMDGPU" in {
14 def sel_x : SubRegIndex;
15 def sel_y : SubRegIndex;
16 def sel_z : SubRegIndex;
17 def sel_w : SubRegIndex;
18 }
19
20 include "R600RegisterInfo.td"
21 include "SIRegisterInfo.td"
0 //=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 // This file declares the AMDGPU specific subclass of TargetSubtarget.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef _AMDGPUSUBTARGET_H_
14 #define _AMDGPUSUBTARGET_H_
15 #include "AMDILSubtarget.h"
16
17 namespace llvm {
18
19 class AMDGPUSubtarget : public AMDILSubtarget
20 {
21 InstrItineraryData InstrItins;
22
23 public:
24 AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
25 AMDILSubtarget(TT, CPU, FS)
26 {
27 InstrItins = getInstrItineraryForCPU(CPU);
28 }
29
30 const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
31 };
32
33 } // End namespace llvm
34
35 #endif // AMDGPUSUBTARGET_H_
0 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The AMDGPU target machine contains all of the hardware specific information
10 // needed to emit code for R600 and SI GPUs.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUTargetMachine.h"
15 #include "AMDGPU.h"
16 #include "R600ISelLowering.h"
17 #include "R600InstrInfo.h"
18 #include "SIISelLowering.h"
19 #include "SIInstrInfo.h"
20 #include "llvm/Analysis/Passes.h"
21 #include "llvm/Analysis/Verifier.h"
22 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/PassManager.h"
27 #include "llvm/Support/TargetRegistry.h"
28 #include "llvm/Support/raw_os_ostream.h"
29 #include "llvm/Transforms/IPO.h"
30 #include "llvm/Transforms/Scalar.h"
31
32 using namespace llvm;
33
34 extern "C" void LLVMInitializeAMDGPUTarget() {
35 // Register the target
36 RegisterTargetMachine X(TheAMDGPUTarget);
37 }
38
39 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
40 StringRef CPU, StringRef FS,
41 TargetOptions Options,
42 Reloc::Model RM, CodeModel::Model CM,
43 CodeGenOpt::Level OptLevel
44 )
45 :
46 LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
47 Subtarget(TT, CPU, FS),
48 DataLayout(Subtarget.getDataLayout()),
49 FrameLowering(TargetFrameLowering::StackGrowsUp,
50 Subtarget.device()->getStackAlignment(), 0),
51 IntrinsicInfo(this),
52 InstrItins(&Subtarget.getInstrItineraryData()),
53 mDump(false)
54
55 {
56 // TLInfo uses InstrInfo so it must be initialized after.
57 if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
58 InstrInfo = new R600InstrInfo(*this);
59 TLInfo = new R600TargetLowering(*this);
60 } else {
61 InstrInfo = new SIInstrInfo(*this);
62 TLInfo = new SITargetLowering(*this);
63 }
64 }
65
66 AMDGPUTargetMachine::~AMDGPUTargetMachine()
67 {
68 }
69
70 bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
71 formatted_raw_ostream &Out,
72 CodeGenFileType FileType,
73 bool DisableVerify,
74 AnalysisID StartAfter,
75 AnalysisID StopAfter) {
76 // XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are
77 // only using it to access addPassesToGenerateCode()
78 bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
79 DisableVerify);
80 assert(fail);
81
82 const AMDILSubtarget &STM = getSubtarget();
83 std::string gpu = STM.getDeviceName();
84 if (gpu == "SI") {
85 PM.add(createSICodeEmitterPass(Out));
86 } else if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
87 PM.add(createR600CodeEmitterPass(Out));
88 } else {
89 abort();
90 return true;
91 }
92 PM.add(createGCInfoDeleter());
93
94 return false;
95 }
96
97 namespace {
98 class AMDGPUPassConfig : public TargetPassConfig {
99 public:
100 AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
101 : TargetPassConfig(TM, PM) {}
102
103 AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
104 return getTM();
105 }
106
107 virtual bool addPreISel();
108 virtual bool addInstSelector();
109 virtual bool addPreRegAlloc();
110 virtual bool addPostRegAlloc();
111 virtual bool addPreSched2();
112 virtual bool addPreEmitPass();
113 };
114 } // End of anonymous namespace
115
116 TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
117 return new AMDGPUPassConfig(this, PM);
118 }
119
120 bool
121 AMDGPUPassConfig::addPreISel()
122 {
123 const AMDILSubtarget &ST = TM->getSubtarget();
124 if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
125 addPass(createR600KernelParametersPass(
126 getAMDGPUTargetMachine().getTargetData()));
127 }
128 return false;
129 }
130
131 bool AMDGPUPassConfig::addInstSelector() {
132 addPass(createAMDILPeepholeOpt(*TM));
133 addPass(createAMDILISelDag(getAMDGPUTargetMachine()));
134 return false;
135 }
136
137 bool AMDGPUPassConfig::addPreRegAlloc() {
138 const AMDILSubtarget &ST = TM->getSubtarget();
139
140 if (ST.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
141 addPass(createSIAssignInterpRegsPass(*TM));
142 }
143 addPass(createAMDGPUConvertToISAPass(*TM));
144 return false;
145 }
146
147 bool AMDGPUPassConfig::addPostRegAlloc() {
148 return false;
149 }
150
151 bool AMDGPUPassConfig::addPreSched2() {
152 return false;
153 }
154
155 bool AMDGPUPassConfig::addPreEmitPass() {
156 addPass(createAMDILCFGPreparationPass(*TM));
157 addPass(createAMDILCFGStructurizerPass(*TM));
158
159 return false;
160 }
161
0 //===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The AMDGPU TargetMachine interface definition for hw codgen targets.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef AMDGPU_TARGET_MACHINE_H
14 #define AMDGPU_TARGET_MACHINE_H
15
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "AMDILFrameLowering.h"
19 #include "AMDILIntrinsicInfo.h"
20 #include "R600ISelLowering.h"
21 #include "llvm/ADT/OwningPtr.h"
22 #include "llvm/Target/TargetData.h"
23
24 namespace llvm {
25
26 MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
27
28 class AMDGPUTargetMachine : public LLVMTargetMachine {
29
30 AMDGPUSubtarget Subtarget;
31 const TargetData DataLayout;
32 AMDILFrameLowering FrameLowering;
33 AMDILIntrinsicInfo IntrinsicInfo;
34 const AMDGPUInstrInfo * InstrInfo;
35 AMDGPUTargetLowering * TLInfo;
36 const InstrItineraryData* InstrItins;
37 bool mDump;
38
39 public:
40 AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
41 StringRef CPU,
42 TargetOptions Options,
43 Reloc::Model RM, CodeModel::Model CM,
44 CodeGenOpt::Level OL);
45 ~AMDGPUTargetMachine();
46 virtual const AMDILFrameLowering* getFrameLowering() const {
47 return &FrameLowering;
48 }
49 virtual const AMDILIntrinsicInfo* getIntrinsicInfo() const {
50 return &IntrinsicInfo;
51 }
52 virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
53 virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
54 virtual const AMDGPURegisterInfo *getRegisterInfo() const {
55 return &InstrInfo->getRegisterInfo();
56 }
57 virtual AMDGPUTargetLowering * getTargetLowering() const {
58 return TLInfo;
59 }
60 virtual const InstrItineraryData* getInstrItineraryData() const {
61 return InstrItins;
62 }
63 virtual const TargetData* getTargetData() const { return &DataLayout; }
64 virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
65 virtual bool addPassesToEmitFile(PassManagerBase &PM,
66 formatted_raw_ostream &Out,
67 CodeGenFileType FileType,
68 bool DisableVerify,
69 AnalysisID StartAfter = 0,
70 AnalysisID StopAfter = 0);
71 };
72
73 } // End namespace llvm
74
75 #endif // AMDGPU_TARGET_MACHINE_H
0 //===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Common utility functions used by hw codegen targets
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPUUtil.h"
14 #include "AMDGPURegisterInfo.h"
15 #include "AMDIL.h"
16 #include "llvm/CodeGen/MachineFunction.h"
17 #include "llvm/CodeGen/MachineInstrBuilder.h"
18 #include "llvm/CodeGen/MachineRegisterInfo.h"
19 #include "llvm/Target/TargetInstrInfo.h"
20 #include "llvm/Target/TargetMachine.h"
21 #include "llvm/Target/TargetRegisterInfo.h"
22
23 using namespace llvm;
24
25 // Some instructions act as place holders to emulate operations that the GPU
26 // hardware does automatically. This function can be used to check if
27 // an opcode falls into this category.
28 bool AMDGPU::isPlaceHolderOpcode(unsigned opcode)
29 {
30 switch (opcode) {
31 default: return false;
32 case AMDGPU::RETURN:
33 case AMDGPU::LOAD_INPUT:
34 case AMDGPU::LAST:
35 case AMDGPU::MASK_WRITE:
36 case AMDGPU::RESERVE_REG:
37 return true;
38 }
39 }
40
41 bool AMDGPU::isTransOp(unsigned opcode)
42 {
43 switch(opcode) {
44 default: return false;
45
46 case AMDGPU::COS_r600:
47 case AMDGPU::COS_eg:
48 case AMDGPU::MULLIT:
49 case AMDGPU::MUL_LIT_r600:
50 case AMDGPU::MUL_LIT_eg:
51 case AMDGPU::EXP_IEEE_r600:
52 case AMDGPU::EXP_IEEE_eg:
53 case AMDGPU::LOG_CLAMPED_r600:
54 case AMDGPU::LOG_IEEE_r600:
55 case AMDGPU::LOG_CLAMPED_eg:
56 case AMDGPU::LOG_IEEE_eg:
57 return true;
58 }
59 }
60
61 bool AMDGPU::isTexOp(unsigned opcode)
62 {
63 switch(opcode) {
64 default: return false;
65 case AMDGPU::TEX_LD:
66 case AMDGPU::TEX_GET_TEXTURE_RESINFO:
67 case AMDGPU::TEX_SAMPLE:
68 case AMDGPU::TEX_SAMPLE_C:
69 case AMDGPU::TEX_SAMPLE_L:
70 case AMDGPU::TEX_SAMPLE_C_L:
71 case AMDGPU::TEX_SAMPLE_LB:
72 case AMDGPU::TEX_SAMPLE_C_LB:
73 case AMDGPU::TEX_SAMPLE_G:
74 case AMDGPU::TEX_SAMPLE_C_G:
75 case AMDGPU::TEX_GET_GRADIENTS_H:
76 case AMDGPU::TEX_GET_GRADIENTS_V:
77 case AMDGPU::TEX_SET_GRADIENTS_H:
78 case AMDGPU::TEX_SET_GRADIENTS_V:
79 return true;
80 }
81 }
82
83 bool AMDGPU::isReductionOp(unsigned opcode)
84 {
85 switch(opcode) {
86 default: return false;
87 case AMDGPU::DOT4_r600:
88 case AMDGPU::DOT4_eg:
89 return true;
90 }
91 }
92
93 bool AMDGPU::isCubeOp(unsigned opcode)
94 {
95 switch(opcode) {
96 default: return false;
97 case AMDGPU::CUBE_r600:
98 case AMDGPU::CUBE_eg:
99 return true;
100 }
101 }
102
103
104 bool AMDGPU::isFCOp(unsigned opcode)
105 {
106 switch(opcode) {
107 default: return false;
108 case AMDGPU::BREAK_LOGICALZ_f32:
109 case AMDGPU::BREAK_LOGICALNZ_i32:
110 case AMDGPU::BREAK_LOGICALZ_i32:
111 case AMDGPU::BREAK_LOGICALNZ_f32:
112 case AMDGPU::CONTINUE_LOGICALNZ_f32:
113 case AMDGPU::IF_LOGICALNZ_i32:
114 case AMDGPU::IF_LOGICALZ_f32:
115 case AMDGPU::ELSE:
116 case AMDGPU::ENDIF:
117 case AMDGPU::ENDLOOP:
118 case AMDGPU::IF_LOGICALNZ_f32:
119 case AMDGPU::WHILELOOP:
120 return true;
121 }
122 }
123
124 void AMDGPU::utilAddLiveIn(MachineFunction * MF,
125 MachineRegisterInfo & MRI,
126 const TargetInstrInfo * TII,
127 unsigned physReg, unsigned virtReg)
128 {
129 if (!MRI.isLiveIn(physReg)) {
130 MRI.addLiveIn(physReg, virtReg);
131 MF->front().addLiveIn(physReg);
132 BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
133 TII->get(TargetOpcode::COPY), virtReg)
134 .addReg(physReg);
135 } else {
136 MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
137 }
138 }
0 //===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Declarations for utility functions common to all hw codegen targets.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef AMDGPU_UTIL_H
14 #define AMDGPU_UTIL_H
15
16 namespace llvm {
17
18 class MachineFunction;
19 class MachineRegisterInfo;
20 class TargetInstrInfo;
21
22 namespace AMDGPU {
23
24 bool isPlaceHolderOpcode(unsigned opcode);
25
26 bool isTransOp(unsigned opcode);
27 bool isTexOp(unsigned opcode);
28 bool isReductionOp(unsigned opcode);
29 bool isCubeOp(unsigned opcode);
30 bool isFCOp(unsigned opcode);
31
32 // XXX: Move these to AMDGPUInstrInfo.h
33 #define MO_FLAG_CLAMP (1 << 0)
34 #define MO_FLAG_NEG (1 << 1)
35 #define MO_FLAG_ABS (1 << 2)
36 #define MO_FLAG_MASK (1 << 3)
37
38 void utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
39 const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg);
40
41 } // End namespace AMDGPU
42
43 } // End namespace llvm
44
45 #endif // AMDGPU_UTIL_H
0 //===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 // This file contains the entry points for global functions defined in the LLVM
10 // AMDIL back-end.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef AMDIL_H_
15 #define AMDIL_H_
16
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/Target/TargetMachine.h"
19
20 #define AMDIL_MAJOR_VERSION 2
21 #define AMDIL_MINOR_VERSION 0
22 #define AMDIL_REVISION_NUMBER 74
23 #define ARENA_SEGMENT_RESERVED_UAVS 12
24 #define DEFAULT_ARENA_UAV_ID 8
25 #define DEFAULT_RAW_UAV_ID 7
26 #define GLOBAL_RETURN_RAW_UAV_ID 11
27 #define HW_MAX_NUM_CB 8
28 #define MAX_NUM_UNIQUE_UAVS 8
29 #define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
30 #define OPENCL_MAX_READ_IMAGES 128
31 #define OPENCL_MAX_WRITE_IMAGES 8
32 #define OPENCL_MAX_SAMPLERS 16
33
34 // The next two values can never be zero, as zero is the ID that is
35 // used to assert against.
36 #define DEFAULT_LDS_ID 1
37 #define DEFAULT_GDS_ID 1
38 #define DEFAULT_SCRATCH_ID 1
39 #define DEFAULT_VEC_SLOTS 8
40
41 // SC->CAL version matchings.
42 #define CAL_VERSION_SC_150 1700
43 #define CAL_VERSION_SC_149 1700
44 #define CAL_VERSION_SC_148 1525
45 #define CAL_VERSION_SC_147 1525
46 #define CAL_VERSION_SC_146 1525
47 #define CAL_VERSION_SC_145 1451
48 #define CAL_VERSION_SC_144 1451
49 #define CAL_VERSION_SC_143 1441
50 #define CAL_VERSION_SC_142 1441
51 #define CAL_VERSION_SC_141 1420
52 #define CAL_VERSION_SC_140 1400
53 #define CAL_VERSION_SC_139 1387
54 #define CAL_VERSION_SC_138 1387
55 #define CAL_APPEND_BUFFER_SUPPORT 1340
56 #define CAL_VERSION_SC_137 1331
57 #define CAL_VERSION_SC_136 982
58 #define CAL_VERSION_SC_135 950
59 #define CAL_VERSION_GLOBAL_RETURN_BUFFER 990
60
61 #define OCL_DEVICE_RV710 0x0001
62 #define OCL_DEVICE_RV730 0x0002
63 #define OCL_DEVICE_RV770 0x0004
64 #define OCL_DEVICE_CEDAR 0x0008
65 #define OCL_DEVICE_REDWOOD 0x0010
66 #define OCL_DEVICE_JUNIPER 0x0020
67 #define OCL_DEVICE_CYPRESS 0x0040
68 #define OCL_DEVICE_CAICOS 0x0080
69 #define OCL_DEVICE_TURKS 0x0100
70 #define OCL_DEVICE_BARTS 0x0200
71 #define OCL_DEVICE_CAYMAN 0x0400
72 #define OCL_DEVICE_ALL 0x3FFF
73
74 /// The number of function ID's that are reserved for
75 /// internal compiler usage.
76 const unsigned int RESERVED_FUNCS = 1024;
77
78 #define AMDIL_OPT_LEVEL_DECL
79 #define AMDIL_OPT_LEVEL_VAR
80 #define AMDIL_OPT_LEVEL_VAR_NO_COMMA
81
82 namespace llvm {
83 class AMDILInstrPrinter;
84 class FunctionPass;
85 class MCAsmInfo;
86 class raw_ostream;
87 class Target;
88 class TargetMachine;
89
90 /// Instruction selection passes.
91 FunctionPass*
92 createAMDILISelDag(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
93 FunctionPass*
94 createAMDILPeepholeOpt(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
95
96 /// Pre emit passes.
97 FunctionPass*
98 createAMDILCFGPreparationPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
99 FunctionPass*
100 createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
101
102 extern Target TheAMDILTarget;
103 extern Target TheAMDGPUTarget;
104 } // end namespace llvm;
105
106 #define GET_REGINFO_ENUM
107 #include "AMDGPUGenRegisterInfo.inc"
108 #define GET_INSTRINFO_ENUM
109 #include "AMDGPUGenInstrInfo.inc"
110
111 /// Include device information enumerations
112 #include "AMDILDeviceInfo.h"
113
114 namespace llvm {
115 /// OpenCL uses address spaces to differentiate between
116 /// various memory regions on the hardware. On the CPU
117 /// all of the address spaces point to the same memory,
118 /// however on the GPU, each address space points to
119 /// a seperate piece of memory that is unique from other
120 /// memory locations.
121 namespace AMDILAS {
122 enum AddressSpaces {
123 PRIVATE_ADDRESS = 0, // Address space for private memory.
124 GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
125 CONSTANT_ADDRESS = 2, // Address space for constant memory.
126 LOCAL_ADDRESS = 3, // Address space for local memory.
127 REGION_ADDRESS = 4, // Address space for region memory.
128 ADDRESS_NONE = 5, // Address space for unknown memory.
129 PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
130 PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
131 USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
132 LAST_ADDRESS = 9
133 };
134
135 // This union/struct combination is an easy way to read out the
136 // exact bits that are needed.
137 typedef union ResourceRec {
138 struct {
139 #ifdef __BIG_ENDIAN__
140 unsigned short isImage : 1; // Reserved for future use/llvm.
141 unsigned short ResourceID : 10; // Flag to specify the resourece ID for
142 // the op.
143 unsigned short HardwareInst : 1; // Flag to specify that this instruction
144 // is a hardware instruction.
145 unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a
146 // conflict.
147 unsigned short ByteStore : 1; // Flag to specify if the op is a byte
148 // store op.
149 unsigned short PointerPath : 1; // Flag to specify if the op is on the
150 // pointer path.
151 unsigned short CacheableRead : 1; // Flag to specify if the read is
152 // cacheable.
153 #else
154 unsigned short CacheableRead : 1; // Flag to specify if the read is
155 // cacheable.
156 unsigned short PointerPath : 1; // Flag to specify if the op is on the
157 // pointer path.
158 unsigned short ByteStore : 1; // Flag to specify if the op is byte
159 // store op.
160 unsigned short ConflictPtr : 1; // Flag to specify that the pointer has
161 // a conflict.
162 unsigned short HardwareInst : 1; // Flag to specify that this instruction
163 // is a hardware instruction.
164 unsigned short ResourceID : 10; // Flag to specify the resource ID for
165 // the op.
166 unsigned short isImage : 1; // Reserved for future use.
167 #endif
168 } bits;
169 unsigned short u16all;
170 } InstrResEnc;
171
172 } // namespace AMDILAS
173
174 // Enums corresponding to AMDIL condition codes for IL. These
175 // values must be kept in sync with the ones in the .td file.
176 namespace AMDILCC {
177 enum CondCodes {
178 // AMDIL specific condition codes. These correspond to the IL_CC_*
179 // in AMDILInstrInfo.td and must be kept in the same order.
180 IL_CC_D_EQ = 0, // DEQ instruction.
181 IL_CC_D_GE = 1, // DGE instruction.
182 IL_CC_D_LT = 2, // DLT instruction.
183 IL_CC_D_NE = 3, // DNE instruction.
184 IL_CC_F_EQ = 4, // EQ instruction.
185 IL_CC_F_GE = 5, // GE instruction.
186 IL_CC_F_LT = 6, // LT instruction.
187 IL_CC_F_NE = 7, // NE instruction.
188 IL_CC_I_EQ = 8, // IEQ instruction.
189 IL_CC_I_GE = 9, // IGE instruction.
190 IL_CC_I_LT = 10, // ILT instruction.
191 IL_CC_I_NE = 11, // INE instruction.
192 IL_CC_U_GE = 12, // UGE instruction.
193 IL_CC_U_LT = 13, // ULE instruction.
194 // Pseudo IL Comparison instructions here.
195 IL_CC_F_GT = 14, // GT instruction.
196 IL_CC_U_GT = 15,
197 IL_CC_I_GT = 16,
198 IL_CC_D_GT = 17,
199 IL_CC_F_LE = 18, // LE instruction
200 IL_CC_U_LE = 19,
201 IL_CC_I_LE = 20,
202 IL_CC_D_LE = 21,
203 IL_CC_F_UNE = 22,
204 IL_CC_F_UEQ = 23,
205 IL_CC_F_ULT = 24,
206 IL_CC_F_UGT = 25,
207 IL_CC_F_ULE = 26,
208 IL_CC_F_UGE = 27,
209 IL_CC_F_ONE = 28,
210 IL_CC_F_OEQ = 29,
211 IL_CC_F_OLT = 30,
212 IL_CC_F_OGT = 31,
213 IL_CC_F_OLE = 32,
214 IL_CC_F_OGE = 33,
215 IL_CC_D_UNE = 34,
216 IL_CC_D_UEQ = 35,
217 IL_CC_D_ULT = 36,
218 IL_CC_D_UGT = 37,
219 IL_CC_D_ULE = 38,
220 IL_CC_D_UGE = 39,
221 IL_CC_D_ONE = 40,
222 IL_CC_D_OEQ = 41,
223 IL_CC_D_OLT = 42,
224 IL_CC_D_OGT = 43,
225 IL_CC_D_OLE = 44,
226 IL_CC_D_OGE = 45,
227 IL_CC_U_EQ = 46,
228 IL_CC_U_NE = 47,
229 IL_CC_F_O = 48,
230 IL_CC_D_O = 49,
231 IL_CC_F_UO = 50,
232 IL_CC_D_UO = 51,
233 IL_CC_L_LE = 52,
234 IL_CC_L_GE = 53,
235 IL_CC_L_EQ = 54,
236 IL_CC_L_NE = 55,
237 IL_CC_L_LT = 56,
238 IL_CC_L_GT = 57,
239 IL_CC_UL_LE = 58,
240 IL_CC_UL_GE = 59,
241 IL_CC_UL_EQ = 60,
242 IL_CC_UL_NE = 61,
243 IL_CC_UL_LT = 62,
244 IL_CC_UL_GT = 63,
245 COND_ERROR = 64
246 };
247
248 } // end namespace AMDILCC
249 } // end namespace llvm
250 #endif // AMDIL_H_
0 //===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8 #include "AMDIL7XXDevice.h"
9 #include "AMDILDevice.h"
10
11 using namespace llvm;
12
13 AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
14 {
15 setCaps();
16 std::string name = mSTM->getDeviceName();
17 if (name == "rv710") {
18 mDeviceFlag = OCL_DEVICE_RV710;
19 } else if (name == "rv730") {
20 mDeviceFlag = OCL_DEVICE_RV730;
21 } else {
22 mDeviceFlag = OCL_DEVICE_RV770;
23 }
24 }
25
26 AMDIL7XXDevice::~AMDIL7XXDevice()
27 {
28 }
29
30 void AMDIL7XXDevice::setCaps()
31 {
32 mSWBits.set(AMDILDeviceInfo::LocalMem);
33 }
34
35 size_t AMDIL7XXDevice::getMaxLDSSize() const
36 {
37 if (usesHardware(AMDILDeviceInfo::LocalMem)) {
38 return MAX_LDS_SIZE_700;
39 }
40 return 0;
41 }
42
43 size_t AMDIL7XXDevice::getWavefrontSize() const
44 {
45 return AMDILDevice::HalfWavefrontSize;
46 }
47
48 uint32_t AMDIL7XXDevice::getGeneration() const
49 {
50 return AMDILDeviceInfo::HD4XXX;
51 }
52
53 uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
54 {
55 switch (DeviceID) {
56 default:
57 assert(0 && "ID type passed in is unknown!");
58 break;
59 case GLOBAL_ID:
60 case CONSTANT_ID:
61 case RAW_UAV_ID:
62 case ARENA_UAV_ID:
63 break;
64 case LDS_ID:
65 if (usesHardware(AMDILDeviceInfo::LocalMem)) {
66 return DEFAULT_LDS_ID;
67 }
68 break;
69 case SCRATCH_ID:
70 if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
71 return DEFAULT_SCRATCH_ID;
72 }
73 break;
74 case GDS_ID:
75 assert(0 && "GDS UAV ID is not supported on this chip");
76 if (usesHardware(AMDILDeviceInfo::RegionMem)) {
77 return DEFAULT_GDS_ID;
78 }
79 break;
80 };
81
82 return 0;
83 }
84
85 uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
86 {
87 return 1;
88 }
89
90 AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
91 {
92 setCaps();
93 }
94
95 AMDIL770Device::~AMDIL770Device()
96 {
97 }
98
99 void AMDIL770Device::setCaps()
100 {
101 if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
102 mSWBits.set(AMDILDeviceInfo::FMA);
103 mHWBits.set(AMDILDeviceInfo::DoubleOps);
104 }
105 mSWBits.set(AMDILDeviceInfo::BarrierDetect);
106 mHWBits.reset(AMDILDeviceInfo::LongOps);
107 mSWBits.set(AMDILDeviceInfo::LongOps);
108 mSWBits.set(AMDILDeviceInfo::LocalMem);
109 }
110
111 size_t AMDIL770Device::getWavefrontSize() const
112 {
113 return AMDILDevice::WavefrontSize;
114 }
115
116 AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
117 {
118 }
119
120 AMDIL710Device::~AMDIL710Device()
121 {
122 }
123
124 size_t AMDIL710Device::getWavefrontSize() const
125 {
126 return AMDILDevice::QuarterWavefrontSize;
127 }
0 //==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 // Interface for the subtarget data classes.
10 //
11 //===----------------------------------------------------------------------===//
12 // This file will define the interface that each generation needs to
13 // implement in order to correctly answer queries on the capabilities of the
14 // specific hardware.
15 //===----------------------------------------------------------------------===//
16 #ifndef _AMDIL7XXDEVICEIMPL_H_
17 #define _AMDIL7XXDEVICEIMPL_H_
18 #include "AMDILDevice.h"
19 #include "AMDILSubtarget.h"
20
21 namespace llvm {
22 class AMDILSubtarget;
23
24 //===----------------------------------------------------------------------===//
25 // 7XX generation of devices and their respective sub classes
26 //===----------------------------------------------------------------------===//
27
28 // The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
29 // devices are derived from this class. The AMDIL7XX device will only
30 // support the minimal features that are required to be considered OpenCL 1.0
31 // compliant and nothing more.
32 class AMDIL7XXDevice : public AMDILDevice {
33 public:
34 AMDIL7XXDevice(AMDILSubtarget *ST);
35 virtual ~AMDIL7XXDevice();
36 virtual size_t getMaxLDSSize() const;
37 virtual size_t getWavefrontSize() const;
38 virtual uint32_t getGeneration() const;
39 virtual uint32_t getResourceID(uint32_t DeviceID) const;
40 virtual uint32_t getMaxNumUAVs() const;
41
42 protected:
43 virtual void setCaps();
44 }; // AMDIL7XXDevice
45
46 // The AMDIL770Device class represents the RV770 chip and it's
47 // derivative cards. The difference between this device and the base
48 // class is this device device adds support for double precision
49 // and has a larger wavefront size.
50 class AMDIL770Device : public AMDIL7XXDevice {
51 public:
52 AMDIL770Device(AMDILSubtarget *ST);
53 virtual ~AMDIL770Device();
54 virtual size_t getWavefrontSize() const;
55 private:
56 virtual void setCaps();
57 }; // AMDIL770Device
58
59 // The AMDIL710Device class derives from the 7XX base class, but this
60 // class is a smaller derivative, so we need to overload some of the
61 // functions in order to correctly specify this information.
62 class AMDIL710Device : public AMDIL7XXDevice {
63 public:
64 AMDIL710Device(AMDILSubtarget *ST);
65 virtual ~AMDIL710Device();
66 virtual size_t getWavefrontSize() const;
67 }; // AMDIL710Device
68
69 } // namespace llvm
70 #endif // _AMDILDEVICEIMPL_H_
0 //===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides templates algorithms that extend the STL algorithms, but
10 // are useful for the AMDIL backend
11 //
12 //===----------------------------------------------------------------------===//
13
14 // A template function that loops through the iterators and passes the second
15 // argument along with each iterator to the function. If the function returns
16 // true, then the current iterator is invalidated and it moves back, before
17 // moving forward to the next iterator, otherwise it moves forward without
18 // issue. This is based on the for_each STL function, but allows a reference to
19 // the second argument
20 template
21 Function binaryForEach(InputIterator First, InputIterator Last, Function F,
22 Arg &Second)
23 {
24 for ( ; First!=Last; ++First ) {
25 F(*First, Second);
26 }
27 return F;
28 }
29
30 template
31 Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
32 Arg &Second)
33 {
34 for ( ; First!=Last; ++First ) {
35 if (F(*First, Second)) {
36 --First;
37 }
38 }
39 return F;
40 }
41
42 // A template function that has two levels of looping before calling the
43 // function with the passed in argument. See binaryForEach for further
44 // explanation
45 template
46 Function binaryNestedForEach(InputIterator First, InputIterator Last,
47 Function F, Arg &Second)
48 {
49 for ( ; First != Last; ++First) {
50 binaryForEach(First->begin(), First->end(), F, Second);
51 }
52 return F;
53 }
54 template
55 Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
56 Function F, Arg &Second)
57 {
58 for ( ; First != Last; ++First) {
59 safeBinaryForEach(First->begin(), First->end(), F, Second);
60 }
61 return F;
62 }
63
64 // Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
65 // versions of these functions This allows the function to handle situations
66 // such as invalidated iterators
67 template
68 Function safeForEach(InputIterator First, InputIterator Last, Function F)
69 {
70 for ( ; First!=Last; ++First ) F(&First)
71 ; // Do nothing.
72 return F;
73 }
74
75 // A template function that has two levels of looping before calling the
76 // function with a pointer to the current iterator. See binaryForEach for
77 // further explanation
78 template
79 Function safeNestedForEach(InputIterator First, InputIterator Last,
80 SecondIterator S, Function F)
81 {
82 for ( ; First != Last; ++First) {
83 SecondIterator sf, sl;
84 for (sf = First->begin(), sl = First->end();
85 sf != sl; ) {
86 if (!F(&sf)) {
87 ++sf;
88 }
89 }
90 }
91 return F;
92 }
0 //===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // Target-independent interfaces which we are implementing
9 //===----------------------------------------------------------------------===//
10
11 include "llvm/Target/Target.td"
12
13 // Dummy Instruction itineraries for pseudo instructions
14 def ALU_NULL : FuncUnit;
15 def NullALU : InstrItinClass;
16
17 //===----------------------------------------------------------------------===//
18 // AMDIL Subtarget features.
19 //===----------------------------------------------------------------------===//
20 def FeatureFP64 : SubtargetFeature<"fp64",
21 "CapsOverride[AMDILDeviceInfo::DoubleOps]",
22 "true",
23 "Enable 64bit double precision operations">;
24 def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
25 "CapsOverride[AMDILDeviceInfo::ByteStores]",
26 "true",
27 "Enable byte addressable stores">;
28 def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
29 "CapsOverride[AMDILDeviceInfo::BarrierDetect]",
30 "true",
31 "Enable duplicate barrier detection(HD5XXX or later).">;
32 def FeatureImages : SubtargetFeature<"images",
33 "CapsOverride[AMDILDeviceInfo::Images]",
34 "true",
35 "Enable image functions">;
36 def FeatureMultiUAV : SubtargetFeature<"multi_uav",
37 "CapsOverride[AMDILDeviceInfo::MultiUAV]",
38 "true",
39 "Generate multiple UAV code(HD5XXX family or later)">;
40 def FeatureMacroDB : SubtargetFeature<"macrodb",
41 "CapsOverride[AMDILDeviceInfo::MacroDB]",
42 "true",
43 "Use internal macrodb, instead of macrodb in driver">;
44 def FeatureNoAlias : SubtargetFeature<"noalias",
45 "CapsOverride[AMDILDeviceInfo::NoAlias]",
46 "true",
47 "assert that all kernel argument pointers are not aliased">;
48 def FeatureNoInline : SubtargetFeature<"no-inline",
49 "CapsOverride[AMDILDeviceInfo::NoInline]",
50 "true",
51 "specify whether to not inline functions">;
52
53 def Feature64BitPtr : SubtargetFeature<"64BitPtr",
54 "mIs64bit",
55 "false",
56 "Specify if 64bit addressing should be used.">;
57
58 def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
59 "mIs32on64bit",
60 "false",
61 "Specify if 64bit sized pointers with 32bit addressing should be used.">;
62 def FeatureDebug : SubtargetFeature<"debug",
63 "CapsOverride[AMDILDeviceInfo::Debug]",
64 "true",
65 "Debug mode is enabled, so disable hardware accelerated address spaces.">;
66 def FeatureDumpCode : SubtargetFeature <"DumpCode",
67 "mDumpCode",
68 "true",
69 "Dump MachineInstrs in the CodeEmitter">;
70
71
72 //===----------------------------------------------------------------------===//
73 // Register File, Calling Conv, Instruction Descriptions
74 //===----------------------------------------------------------------------===//
75
76
77 include "AMDILRegisterInfo.td"
78 include "AMDILCallingConv.td"
79 include "AMDILInstrInfo.td"
80
81 def AMDILInstrInfo : InstrInfo {}
82
83 //===----------------------------------------------------------------------===//
84 // AMDIL processors supported.
85 //===----------------------------------------------------------------------===//
86 //include "Processors.td"
87
88 //===----------------------------------------------------------------------===//
89 // Declare the target which we are implementing
90 //===----------------------------------------------------------------------===//
91 def AMDILAsmWriter : AsmWriter {
92 string AsmWriterClassName = "AsmPrinter";
93 int Variant = 0;
94 }
95
96 def AMDILAsmParser : AsmParser {
97 string AsmParserClassName = "AsmParser";
98 int Variant = 0;
99
100 string CommentDelimiter = ";";
101
102 string RegisterPrefix = "r";
103
104 }
105
106
107 def AMDIL : Target {
108 // Pull in Instruction Info:
109 let InstructionSet = AMDILInstrInfo;
110 let AssemblyWriters = [AMDILAsmWriter];
111 let AssemblyParsers = [AMDILAsmParser];
112 }
0 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8
9 #define DEBUGME 0
10 #define DEBUG_TYPE "structcfg"
11
12 #include "AMDIL.h"
13 #include "AMDILInstrInfo.h"
14 #include "AMDILRegisterInfo.h"
15 #include "AMDILUtilityFunctions.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/DominatorInternals.h"
20 #include "llvm/Analysis/Dominators.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineLoopInfo.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32
33 #define FirstNonDebugInstr(A) A->begin()
34 using namespace llvm;
35
36 // TODO: move-begin.
37
38 //===----------------------------------------------------------------------===//
39 //
40 // Statistics for CFGStructurizer.
41 //
42 //===----------------------------------------------------------------------===//
43
44 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
45 "matched");
46 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
47 "matched");
48 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
49 "pattern matched");
50 STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
51 "pattern matched");
52 STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
53 "matched");
54 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
55 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
56
57 //===----------------------------------------------------------------------===//
58 //
59 // Miscellaneous utility for CFGStructurizer.
60 //
61 //===----------------------------------------------------------------------===//
62 namespace llvmCFGStruct
63 {
64 #define SHOWNEWINSTR(i) \
65 if (DEBUGME) errs() << "New instr: " << *i << "\n"
66
67 #define SHOWNEWBLK(b, msg) \
68 if (DEBUGME) { \
69 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
70 errs() << "\n"; \
71 }
72
73 #define SHOWBLK_DETAIL(b, msg) \
74 if (DEBUGME) { \
75 if (b) { \
76 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
77 b->print(errs()); \
78 errs() << "\n"; \
79 } \
80 }
81
82 #define INVALIDSCCNUM -1
83 #define INVALIDREGNUM 0
84
85 template
86 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
87 for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
88 iterEnd = LoopInfo.end();
89 iter != iterEnd; ++iter) {
90 (*iter)->print(OS, 0);
91 }
92 }
93
94 template
95 void ReverseVector(SmallVector &Src) {
96 size_t sz = Src.size();
97 for (size_t i = 0; i < sz/2; ++i) {
98 NodeT *t = Src[i];
99 Src[i] = Src[sz - i - 1];
100 Src[sz - i - 1] = t;
101 }
102 }
103
104 } //end namespace llvmCFGStruct
105
106
107 //===----------------------------------------------------------------------===//
108 //
109 // MachinePostDominatorTree
110 //
111 //===----------------------------------------------------------------------===//
112
113 namespace llvm {
114
115 /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
116 /// to compute the a post-dominator tree.
117 ///
118 struct MachinePostDominatorTree : public MachineFunctionPass {
119 static char ID; // Pass identification, replacement for typeid
120 DominatorTreeBase *DT;
121 MachinePostDominatorTree() : MachineFunctionPass(ID)
122 {
123 DT = new DominatorTreeBase(true); //true indicate
124 // postdominator
125 }
126
127 ~MachinePostDominatorTree();
128
129 virtual bool runOnMachineFunction(MachineFunction &MF);
130
131 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
132 AU.setPreservesAll();
133 MachineFunctionPass::getAnalysisUsage(AU);
134 }
135
136 inline const std::vector &getRoots() const {
137 return DT->getRoots();
138 }
139
140 inline MachineDomTreeNode *getRootNode() const {
141 return DT->getRootNode();
142 }
143
144 inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
145 return DT->getNode(BB);
146 }
147
148 inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
149 return DT->getNode(BB);
150 }
151
152 inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
153 return DT->dominates(A, B);
154 }
155
156 inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
157 return DT->dominates(A, B);
158 }
159
160 inline bool
161 properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
162 return DT->properlyDominates(A, B);
163 }
164
165 inline bool
166 properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
167 return DT->properlyDominates(A, B);
168 }
169
170 inline MachineBasicBlock *
171 findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
172 return DT->findNearestCommonDominator(A, B);
173 }
174
175 virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
176 DT->print(OS);
177 }
178 };
179 } //end of namespace llvm
180
181 char MachinePostDominatorTree::ID = 0;
182 static RegisterPass
183 machinePostDominatorTreePass("machinepostdomtree",
184 "MachinePostDominator Tree Construction",
185 true, true);
186
187 //const PassInfo *const llvm::MachinePostDominatorsID
188 //= &machinePostDominatorTreePass;
189
190 bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
191 DT->recalculate(F);
192 //DEBUG(DT->dump());
193 return false;
194 }
195
196 MachinePostDominatorTree::~MachinePostDominatorTree() {
197 delete DT;
198 }
199
200 //===----------------------------------------------------------------------===//
201 //
202 // supporting data structure for CFGStructurizer
203 //
204 //===----------------------------------------------------------------------===//
205
206 namespace llvmCFGStruct
207 {
208 template
209 struct CFGStructTraits {
210 };
211
212 template
213 class BlockInformation {
214 public:
215 bool isRetired;
216 int sccNum;
217 //SmallVector succInstr;
218 //Instructions defining the corresponding successor.
219 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
220 };
221
222 template
223 class LandInformation {
224 public:
225 BlockT *landBlk;
226 std::set breakInitRegs; //Registers that need to "reg = 0", before
227 //WHILELOOP(thisloop) init before entering
228 //thisloop.
229 std::set contInitRegs; //Registers that need to "reg = 0", after
230 //WHILELOOP(thisloop) init after entering
231 //thisloop.
232 std::set endbranchInitRegs; //Init before entering this loop, at loop
233 //land block, branch cond on this reg.
234 std::set breakOnRegs; //registers that need to "if (reg) break
235 //endif" after ENDLOOP(thisloop) break
236 //outerLoopOf(thisLoop).
237 std::set contOnRegs; //registers that need to "if (reg) continue
238 //endif" after ENDLOOP(thisloop) continue on
239 //outerLoopOf(thisLoop).
240 LandInformation() : landBlk(NULL) {}
241 };
242
243 } //end of namespace llvmCFGStruct
244
245 //===----------------------------------------------------------------------===//
246 //
247 // CFGStructurizer
248 //
249 //===----------------------------------------------------------------------===//
250
251 namespace llvmCFGStruct
252 {
253 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
254 template
255 class CFGStructurizer
256 {
257 public:
258 typedef enum {
259 Not_SinglePath = 0,
260 SinglePath_InPath = 1,
261 SinglePath_NotInPath = 2
262 } PathToKind;
263
264 public:
265 typedef typename PassT::InstructionType InstrT;
266 typedef typename PassT::FunctionType FuncT;
267 typedef typename PassT::DominatortreeType DomTreeT;
268 typedef typename PassT::PostDominatortreeType PostDomTreeT;
269 typedef typename PassT::DomTreeNodeType DomTreeNodeT;
270 typedef typename PassT::LoopinfoType LoopInfoT;
271
272 typedef GraphTraits FuncGTraits;
273 //typedef FuncGTraits::nodes_iterator BlockIterator;
274 typedef typename FuncT::iterator BlockIterator;
275
276 typedef typename FuncGTraits::NodeType BlockT;
277 typedef GraphTraits BlockGTraits;
278 typedef GraphTraits > InvBlockGTraits;
279 //typedef BlockGTraits::succ_iterator InstructionIterator;
280 typedef typename BlockT::iterator InstrIterator;
281
282 typedef CFGStructTraits CFGTraits;
283 typedef BlockInformation BlockInfo;
284 typedef std::map BlockInfoMap;
285
286 typedef int RegiT;
287 typedef typename PassT::LoopType LoopT;
288 typedef LandInformation LoopLandInfo;
289 typedef std::map LoopLandInfoMap;
290 //landing info for loop break
291 typedef SmallVector BlockTSmallerVector;
292
293 public:
294 CFGStructurizer();
295 ~CFGStructurizer();
296
297 /// Perform the CFG structurization
298 bool run(FuncT &Func, PassT &Pass, const AMDILRegisterInfo *tri);
299
300 /// Perform the CFG preparation
301 bool prepare(FuncT &Func, PassT &Pass, const AMDILRegisterInfo *tri);
302
303 private:
304 void orderBlocks();
305 void printOrderedBlocks(llvm::raw_ostream &OS);
306 int patternMatch(BlockT *CurBlock);
307 int patternMatchGroup(BlockT *CurBlock);
308
309 int serialPatternMatch(BlockT *CurBlock);
310 int ifPatternMatch(BlockT *CurBlock);
311 int switchPatternMatch(BlockT *CurBlock);
312 int loopendPatternMatch(BlockT *CurBlock);
313 int loopPatternMatch(BlockT *CurBlock);
314
315 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
316 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
317 //int loopWithoutBreak(BlockT *);
318
319 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
320 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
321 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
322 BlockT *ContBlock, LoopT *contLoop);
323 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
324 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
325 BlockT *FalseBlock);
326 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
327 BlockT *FalseBlock);
328 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
329 BlockT *FalseBlock, BlockT **LandBlockPtr);
330 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
331 BlockT *FalseBlock, BlockT *LandBlock,
332 bool Detail = false);
333 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
334 bool AllowSideEntry = true);
335 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
336 bool AllowSideEntry = true);
337 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
338 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
339
340 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
341 BlockT *TrueBlock, BlockT *FalseBlock,
342 BlockT *LandBlock);
343 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
344 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
345 BlockT *ExitLandBlock, RegiT SetReg);
346 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
347 RegiT SetReg);
348 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
349 std::set &ExitBlockSet,
350 BlockT *ExitLandBlk);
351 BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
352 BlockTSmallerVector &ExitingBlocks,
353 BlockTSmallerVector &ExitBlocks);
354 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
355 void removeUnconditionalBranch(BlockT *SrcBlock);
356 void removeRedundantConditionalBranch(BlockT *SrcBlock);
357 void addDummyExitBlock(SmallVector &RetBlocks);
358
359 void removeSuccessor(BlockT *SrcBlock);
360 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
361 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
362
363 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
364 InstrIterator InsertPos);
365
366 void recordSccnum(BlockT *SrcBlock, int SCCNum);
367 int getSCCNum(BlockT *srcBlk);
368
369 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
370 bool isRetiredBlock(BlockT *SrcBlock);
371 bool isActiveLoophead(BlockT *CurBlock);
372 bool needMigrateBlock(BlockT *Block);
373
374 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
375 BlockTSmallerVector &exitBlocks,
376 std::set &ExitBlockSet);
377 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
378 BlockT *getLoopLandBlock(LoopT *LoopRep);
379 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
380
381 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
382 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
383 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
384 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
385 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
386
387 bool hasBackEdge(BlockT *curBlock);
388 unsigned getLoopDepth (LoopT *LoopRep);
389 int countActiveBlock(
390 typename SmallVector::const_iterator IterStart,
391 typename SmallVector::const_iterator IterEnd);
392 BlockT *findNearestCommonPostDom(std::set&);
393 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
394
395 private:
396 DomTreeT *domTree;
397 PostDomTreeT *postDomTree;
398 LoopInfoT *loopInfo;
399 PassT *passRep;
400 FuncT *funcRep;
401
402 BlockInfoMap blockInfoMap;
403 LoopLandInfoMap loopLandInfoMap;
404 SmallVector orderedBlks;
405 const AMDILRegisterInfo *TRI;
406
407 }; //template class CFGStructurizer
408
409 template CFGStructurizer::CFGStructurizer()
410 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
411 }
412
413 template CFGStructurizer::~CFGStructurizer() {
414 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
415 E = blockInfoMap.end(); I != E; ++I) {
416 delete I->second;
417 }
418 }
419
420 template
421 bool CFGStructurizer::prepare(FuncT &func, PassT &pass,
422 const AMDILRegisterInfo * tri) {
423 passRep = &pass;
424 funcRep = &func;
425 TRI = tri;
426
427 bool changed = false;
428 //func.RenumberBlocks();
429
430 //to do, if not reducible flow graph, make it so ???
431
432 if (DEBUGME) {
433 errs() << "AMDILCFGStructurizer::prepare\n";
434 //func.viewCFG();
435 //func.viewCFGOnly();
436 //func.dump();
437 }
438
439 //FIXME: gcc complains on this.
440 //domTree = &pass.getAnalysis();
441 //domTree = CFGTraits::getDominatorTree(pass);
442 //if (DEBUGME) {
443 // domTree->print(errs());
444 //}
445
446 //FIXME: gcc complains on this.
447 //domTree = &pass.getAnalysis();
448 //postDomTree = CFGTraits::getPostDominatorTree(pass);
449 //if (DEBUGME) {
450 // postDomTree->print(errs());
451 //}
452
453 //FIXME: gcc complains on this.
454 //loopInfo = &pass.getAnalysis();
455 loopInfo = CFGTraits::getLoopInfo(pass);
456 if (DEBUGME) {
457 errs() << "LoopInfo:\n";
458 PrintLoopinfo(*loopInfo, errs());
459 }
460
461 orderBlocks();
462 if (DEBUGME) {
463 errs() << "Ordered blocks:\n";
464 printOrderedBlocks(errs());
465 }
466
467 SmallVector retBlks;
468
469 for (typename LoopInfoT::iterator iter = loopInfo->begin(),
470 iterEnd = loopInfo->end();
471 iter != iterEnd; ++iter) {
472 LoopT* loopRep = (*iter);
473 BlockTSmallerVector exitingBlks;
474 loopRep->getExitingBlocks(exitingBlks);
475
476 if (exitingBlks.size() == 0) {
477 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
478 if (dummyExitBlk != NULL)
479 retBlks.push_back(dummyExitBlk);
480 }
481 }
482
483 // Remove unconditional branch instr.
484 // Add dummy exit block iff there are multiple returns.
485
486 for (typename SmallVector::const_iterator
487 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
488 iterBlk != iterEndBlk;
489 ++iterBlk) {
490 BlockT *curBlk = *iterBlk;
491 removeUnconditionalBranch(curBlk);
492 removeRedundantConditionalBranch(curBlk);
493 if (CFGTraits::isReturnBlock(curBlk)) {
494 retBlks.push_back(curBlk);
495 }
496 assert(curBlk->succ_size() <= 2);
497 //assert(curBlk->size() > 0);
498 //removeEmptyBlock(curBlk) ??
499 } //for
500
501 if (retBlks.size() >= 2) {
502 addDummyExitBlock(retBlks);
503 changed = true;
504 }
505
506 return changed;
507 } //CFGStructurizer::prepare
508
509 template
510 bool CFGStructurizer::run(FuncT &func, PassT &pass,
511 const AMDILRegisterInfo * tri) {
512 passRep = &pass;
513 funcRep = &func;
514 TRI = tri;
515
516 //func.RenumberBlocks();
517
518 //Assume reducible CFG...
519 if (DEBUGME) {
520 errs() << "AMDILCFGStructurizer::run\n";
521 //errs() << func.getFunction()->getNameStr() << "\n";
522 func.viewCFG();
523 //func.viewCFGOnly();
524 //func.dump();
525 }
526
527 #if 1
528 //FIXME: gcc complains on this.
529 //domTree = &pass.getAnalysis();
530 domTree = CFGTraits::getDominatorTree(pass);
531 if (DEBUGME) {
532 domTree->print(errs(), (const llvm::Module*)0);
533 }
534 #endif
535
536 //FIXME: gcc complains on this.
537 //domTree = &pass.getAnalysis();
538 postDomTree = CFGTraits::getPostDominatorTree(pass);
539 if (DEBUGME) {
540 postDomTree->print(errs());
541 }
542
543 //FIXME: gcc complains on this.
544 //loopInfo = &pass.getAnalysis();
545 loopInfo = CFGTraits::getLoopInfo(pass);
546 if (DEBUGME) {
547 errs() << "LoopInfo:\n";
548 PrintLoopinfo(*loopInfo, errs());
549 }
550
551 orderBlocks();
552 //#define STRESSTEST
553 #ifdef STRESSTEST
554 //Use the worse block ordering to test the algorithm.
555 ReverseVector(orderedBlks);
556 #endif
557
558 if (DEBUGME) {
559 errs() << "Ordered blocks:\n";
560 printOrderedBlocks(errs());
561 }
562 int numIter = 0;
563 bool finish = false;
564 BlockT *curBlk;
565 bool makeProgress = false;
566 int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
567 orderedBlks.end());
568
569 do {
570 ++numIter;
571 if (DEBUGME) {
572 errs() << "numIter = " << numIter
573 << ", numRemaintedBlk = " << numRemainedBlk << "\n";
574 }
575
576 typename SmallVector::const_iterator
577 iterBlk = orderedBlks.begin();
578 typename SmallVector::const_iterator
579 iterBlkEnd = orderedBlks.end();
580
581 typename SmallVector::const_iterator
582 sccBeginIter = iterBlk;
583 BlockT *sccBeginBlk = NULL;
584 int sccNumBlk = 0; // The number of active blocks, init to a
585 // maximum possible number.
586 int sccNumIter; // Number of iteration in this SCC.
587
588 while (iterBlk != iterBlkEnd) {
589 curBlk = *iterBlk;
590
591 if (sccBeginBlk == NULL) {
592 sccBeginIter = iterBlk;
593 sccBeginBlk = curBlk;
594 sccNumIter = 0;
595 sccNumBlk = numRemainedBlk; // Init to maximum possible number.
596 if (DEBUGME) {
597 errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
598 errs() << "\n";
599 }
600 }
601
602 if (!isRetiredBlock(curBlk)) {
603 patternMatch(curBlk);
604 }
605
606 ++iterBlk;
607
608 bool contNextScc = true;
609 if (iterBlk == iterBlkEnd
610 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
611 // Just finish one scc.
612 ++sccNumIter;
613 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
614 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
615 if (DEBUGME) {
616 errs() << "Can't reduce SCC " << getSCCNum(curBlk)
617 << ", sccNumIter = " << sccNumIter;
618 errs() << "doesn't make any progress\n";
619 }
620 contNextScc = true;
621 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
622 sccNumBlk = sccRemainedNumBlk;
623 iterBlk = sccBeginIter;
624 contNextScc = false;
625 if (DEBUGME) {
626 errs() << "repeat processing SCC" << getSCCNum(curBlk)
627 << "sccNumIter = " << sccNumIter << "\n";
628 func.viewCFG();
629 //func.viewCFGOnly();
630 }
631 } else {
632 // Finish the current scc.
633 contNextScc = true;
634 }
635 } else {
636 // Continue on next component in the current scc.
637 contNextScc = false;
638 }
639
640 if (contNextScc) {
641 sccBeginBlk = NULL;
642 }
643 } //while, "one iteration" over the function.
644
645 BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
646 if (entryBlk->succ_size() == 0) {
647 finish = true;
648 if (DEBUGME) {
649 errs() << "Reduce to one block\n";
650 }
651 } else {
652 int newnumRemainedBlk
653 = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
654 // consider cloned blocks ??
655 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
656 makeProgress = true;
657 numRemainedBlk = newnumRemainedBlk;
658 } else {
659 makeProgress = false;
660 if (DEBUGME) {
661 errs() << "No progress\n";
662 }
663 }
664 }
665 } while (!finish && makeProgress);
666
667 // Misc wrap up to maintain the consistency of the Function representation.
668 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
669
670 // Detach retired Block, release memory.
671 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
672 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
673 if ((*iterMap).second && (*iterMap).second->isRetired) {
674 assert(((*iterMap).first)->getNumber() != -1);
675 if (DEBUGME) {
676 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
677 }
678 (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
679 }
680 delete (*iterMap).second;
681 }
682 blockInfoMap.clear();
683
684 // clear loopLandInfoMap
685 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
686 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
687 delete (*iterMap).second;
688 }
689 loopLandInfoMap.clear();
690
691 if (DEBUGME) {
692 func.viewCFG();
693 //func.dump();
694 }
695
696 if (!finish) {
697 assert(!"IRREDUCIBL_CF");
698 }
699
700 return true;
701 } //CFGStructurizer::run
702
703 /// Print the ordered Blocks.
704 ///
705 template
706 void CFGStructurizer::printOrderedBlocks(llvm::raw_ostream &os) {
707 size_t i = 0;
708 for (typename SmallVector::const_iterator
709 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
710 iterBlk != iterBlkEnd;
711 ++iterBlk, ++i) {
712 os << "BB" << (*iterBlk)->getNumber();
713 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
714 if (i != 0 && i % 10 == 0) {
715 os << "\n";
716 } else {
717 os << " ";
718 }
719 }
720 } //printOrderedBlocks
721
722 /// Compute the reversed DFS post order of Blocks
723 ///
724 template void CFGStructurizer::orderBlocks() {
725 int sccNum = 0;
726 BlockT *bb;
727 for (scc_iterator sccIter = scc_begin(funcRep),
728 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
729 std::vector &sccNext = *sccIter;
730 for (typename std::vector::const_iterator
731 blockIter = sccNext.begin(), blockEnd = sccNext.end();
732 blockIter != blockEnd; ++blockIter) {
733 bb = *blockIter;
734 orderedBlks.push_back(bb);
735 recordSccnum(bb, sccNum);
736 }
737 }
738
739 //walk through all the block in func to check for unreachable
740 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
741 blockEnd1 = FuncGTraits::nodes_end(funcRep);
742 blockIter1 != blockEnd1; ++blockIter1) {
743 BlockT *bb = &(*blockIter1);
744 sccNum = getSCCNum(bb);
745 if (sccNum == INVALIDSCCNUM) {
746 errs() << "unreachable block BB" << bb->getNumber() << "\n";
747 }
748 } //end of for
749 } //orderBlocks
750
751 template int CFGStructurizer::patternMatch(BlockT *curBlk) {
752 int numMatch = 0;
753 int curMatch;
754
755 if (DEBUGME) {
756 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
757 }
758
759 while ((curMatch = patternMatchGroup(curBlk)) > 0) {
760 numMatch += curMatch;
761 }
762
763 if (DEBUGME) {
764 errs() << "End patternMatch BB" << curBlk->getNumber()
765 << ", numMatch = " << numMatch << "\n";
766 }
767
768 return numMatch;
769 } //patternMatch
770
771 template
772 int CFGStructurizer::patternMatchGroup(BlockT *curBlk) {
773 int numMatch = 0;
774 numMatch += serialPatternMatch(curBlk);
775 numMatch += ifPatternMatch(curBlk);
776 //numMatch += switchPatternMatch(curBlk);
777 numMatch += loopendPatternMatch(curBlk);
778 numMatch += loopPatternMatch(curBlk);
779 return numMatch;
780 }//patternMatchGroup
781
782 template
783 int CFGStructurizer::serialPatternMatch(BlockT *curBlk) {
784 if (curBlk->succ_size() != 1) {
785 return 0;
786 }
787
788 BlockT *childBlk = *curBlk->succ_begin();
789 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
790 return 0;
791 }
792
793 mergeSerialBlock(curBlk, childBlk);
794 ++numSerialPatternMatch;
795 return 1;
796 } //serialPatternMatch
797
798 template
799 int CFGStructurizer::ifPatternMatch(BlockT *curBlk) {
800 //two edges
801 if (curBlk->succ_size() != 2) {
802 return 0;
803 }
804
805 if (hasBackEdge(curBlk)) {
806 return 0;
807 }
808
809 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
810 if (branchInstr == NULL) {
811 return 0;
812 }
813
814 assert(CFGTraits::isCondBranch(branchInstr));
815
816 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
817 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
818 BlockT *landBlk;
819 int cloned = 0;
820
821 // TODO: Simplify
822 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
823 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
824 landBlk = *trueBlk->succ_begin();
825 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
826 landBlk = NULL;
827 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
828 landBlk = falseBlk;
829 falseBlk = NULL;
830 } else if (falseBlk->succ_size() == 1
831 && *falseBlk->succ_begin() == trueBlk) {
832 landBlk = trueBlk;
833 trueBlk = NULL;
834 } else if (falseBlk->succ_size() == 1
835 && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
836 landBlk = *falseBlk->succ_begin();
837 } else if (trueBlk->succ_size() == 1
838 && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
839 landBlk = *trueBlk->succ_begin();
840 } else {
841 return handleJumpintoIf(curBlk, trueBlk, falseBlk);
842 }
843
844 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
845 // new BB created for landBlk==NULL may introduce new challenge to the
846 // reduction process.
847 if (landBlk != NULL &&
848 ((trueBlk && trueBlk->pred_size() > 1)
849 || (falseBlk && falseBlk->pred_size() > 1))) {
850 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
851 }
852
853 if (trueBlk && trueBlk->pred_size() > 1) {
854 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
855 ++cloned;
856 }
857
858 if (falseBlk && falseBlk->pred_size() > 1) {
859 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
860 ++cloned;
861 }
862
863 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
864
865 ++numIfPatternMatch;
866
867 numClonedBlock += cloned;
868
869 return 1 + cloned;
870 } //ifPatternMatch
871
872 template
873 int CFGStructurizer::switchPatternMatch(BlockT *curBlk) {
874 return 0;
875 } //switchPatternMatch
876
877 template
878 int CFGStructurizer::loopendPatternMatch(BlockT *curBlk) {
879 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
880 typename std::vector nestedLoops;
881 while (loopRep) {
882 nestedLoops.push_back(loopRep);
883 loopRep = loopRep->getParentLoop();
884 }
885
886 if (nestedLoops.size() == 0) {
887 return 0;
888 }
889
890 // Process nested loop outside->inside, so "continue" to a outside loop won't
891 // be mistaken as "break" of the current loop.
892 int num = 0;
893 for (typename std::vector::reverse_iterator
894 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
895 iter != iterEnd; ++iter) {
896 loopRep = *iter;
897
898 if (getLoopLandBlock(loopRep) != NULL) {
899 continue;
900 }
901
902 BlockT *loopHeader = loopRep->getHeader();
903
904 int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
905
906 if (numBreak == -1) {
907 break;
908 }
909
910 int numCont = loopcontPatternMatch(loopRep, loopHeader);
911 num += numBreak + numCont;
912 }
913
914 return num;
915 } //loopendPatternMatch
916
917 template
918 int CFGStructurizer::loopPatternMatch(BlockT *curBlk) {
919 if (curBlk->succ_size() != 0) {
920 return 0;
921 }
922
923 int numLoop = 0;
924 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
925 while (loopRep && loopRep->getHeader() == curBlk) {
926 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
927 if (loopLand) {
928 BlockT *landBlk = loopLand->landBlk;
929 assert(landBlk);
930 if (!isRetiredBlock(landBlk)) {
931 mergeLooplandBlock(curBlk, loopLand);
932 ++numLoop;
933 }
934 }
935 loopRep = loopRep->getParentLoop();
936 }
937
938 numLoopPatternMatch += numLoop;
939
940 return numLoop;
941 } //loopPatternMatch
942
943 template
944 int CFGStructurizer::loopbreakPatternMatch(LoopT *loopRep,
945 BlockT *loopHeader) {
946 BlockTSmallerVector exitingBlks;
947 loopRep->getExitingBlocks(exitingBlks);
948
949 if (DEBUGME) {
950 errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
951 }
952
953 if (exitingBlks.size() == 0) {
954 setLoopLandBlock(loopRep);
955 return 0;
956 }
957
958 // Compute the corresponding exitBlks and exit block set.
959 BlockTSmallerVector exitBlks;
960 std::set exitBlkSet;
961 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
962 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
963 BlockT *exitingBlk = *iter;
964 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
965 exitBlks.push_back(exitBlk);
966 exitBlkSet.insert(exitBlk); //non-duplicate insert
967 }
968
969 assert(exitBlkSet.size() > 0);
970 assert(exitBlks.size() == exitingBlks.size());
971
972 if (DEBUGME) {
973 errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
974 }
975
976 // Find exitLandBlk.
977 BlockT *exitLandBlk = NULL;
978 int numCloned = 0;
979 int numSerial = 0;
980
981 if (exitBlkSet.size() == 1)
982 {
983 exitLandBlk = *exitBlkSet.begin();
984 } else {
985 exitLandBlk = findNearestCommonPostDom(exitBlkSet);
986
987 if (exitLandBlk == NULL) {
988 return -1;
989 }
990
991 bool allInPath = true;
992 bool allNotInPath = true;
993 for (typename std::set::const_iterator
994 iter = exitBlkSet.begin(),
995 iterEnd = exitBlkSet.end();
996 iter != iterEnd; ++iter) {
997 BlockT *exitBlk = *iter;
998
999 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
1000 if (DEBUGME) {
1001 errs() << "BB" << exitBlk->getNumber()
1002 << " to BB" << exitLandBlk->getNumber() << " PathToKind="
1003 << pathKind << "\n";
1004 }
1005
1006 allInPath = allInPath && (pathKind == SinglePath_InPath);
1007 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
1008
1009 if (!allInPath && !allNotInPath) {
1010 if (DEBUGME) {
1011 errs() << "singlePath check fail\n";
1012 }
1013 return -1;
1014 }
1015 } // check all exit blocks
1016
1017 if (allNotInPath) {
1018 #if 1
1019
1020 // TODO: Simplify, maybe separate function?
1021 //funcRep->viewCFG();
1022 LoopT *parentLoopRep = loopRep->getParentLoop();
1023 BlockT *parentLoopHeader = NULL;
1024 if (parentLoopRep)
1025 parentLoopHeader = parentLoopRep->getHeader();
1026
1027 if (exitLandBlk == parentLoopHeader &&
1028 (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
1029 loopRep,
1030 exitBlkSet,
1031 exitLandBlk)) != NULL) {
1032 if (DEBUGME) {
1033 errs() << "relocateLoopcontBlock success\n";
1034 }
1035 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
1036 exitingBlks,
1037 exitBlks)) != NULL) {
1038 if (DEBUGME) {
1039 errs() << "insertEndbranchBlock success\n";
1040 }
1041 } else {
1042 if (DEBUGME) {
1043 errs() << "loop exit fail\n";
1044 }
1045 return -1;
1046 }
1047 #else
1048 return -1;
1049 #endif
1050 }
1051
1052 // Handle side entry to exit path.
1053 exitBlks.clear();
1054 exitBlkSet.clear();
1055 for (typename BlockTSmallerVector::iterator iterExiting =
1056 exitingBlks.begin(),
1057 iterExitingEnd = exitingBlks.end();
1058 iterExiting != iterExitingEnd; ++iterExiting) {
1059 BlockT *exitingBlk = *iterExiting;
1060 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
1061 BlockT *newExitBlk = exitBlk;
1062
1063 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
1064 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
1065 ++numCloned;
1066 }
1067
1068 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
1069
1070 exitBlks.push_back(newExitBlk);
1071 exitBlkSet.insert(newExitBlk);
1072 }
1073
1074 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1075 iterExitEnd = exitBlks.end();
1076 iterExit != iterExitEnd; ++iterExit) {
1077 BlockT *exitBlk = *iterExit;
1078 numSerial += serialPatternMatch(exitBlk);
1079 }
1080
1081 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1082 iterExitEnd = exitBlks.end();
1083 iterExit != iterExitEnd; ++iterExit) {
1084 BlockT *exitBlk = *iterExit;
1085 if (exitBlk->pred_size() > 1) {
1086 if (exitBlk != exitLandBlk) {
1087 return -1;
1088 }
1089 } else {
1090 if (exitBlk != exitLandBlk &&
1091 (exitBlk->succ_size() != 1 ||
1092 *exitBlk->succ_begin() != exitLandBlk)) {
1093 return -1;
1094 }
1095 }
1096 }
1097 } // else
1098
1099 // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
1100 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
1101
1102 // Fold break into the breaking block. Leverage across level breaks.
1103 assert(exitingBlks.size() == exitBlks.size());
1104 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
1105 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
1106 iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
1107 BlockT *exitBlk = *iterExit;
1108 BlockT *exitingBlk = *iterExiting;
1109 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
1110 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
1111 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
1112 }
1113
1114 int numBreak = static_cast(exitingBlks.size());
1115 numLoopbreakPatternMatch += numBreak;
1116 numClonedBlock += numCloned;
1117 return numBreak + numSerial + numCloned;
1118 } //loopbreakPatternMatch
1119
1120 template
1121 int CFGStructurizer::loopcontPatternMatch(LoopT *loopRep,
1122 BlockT *loopHeader) {
1123 int numCont = 0;
1124 SmallVector contBlk;
1125 for (typename InvBlockGTraits::ChildIteratorType iter =
1126 InvBlockGTraits::child_begin(loopHeader),
1127 iterEnd = InvBlockGTraits::child_end(loopHeader);
1128 iter != iterEnd; ++iter) {
1129 BlockT *curBlk = *iter;
1130 if (loopRep->contains(curBlk)) {
1131 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
1132 loopHeader, loopRep);
1133 contBlk.push_back(curBlk);
1134 ++numCont;
1135 }
1136 }
1137
1138 for (typename SmallVector::iterator
1139 iter = contBlk.begin(), iterEnd = contBlk.end();
1140 iter != iterEnd; ++iter) {
1141 (*iter)->removeSuccessor(loopHeader);
1142 }
1143
1144 numLoopcontPatternMatch += numCont;
1145
1146 return numCont;
1147 } //loopcontPatternMatch
1148
1149
1150 template
1151 bool CFGStructurizer::isSameloopDetachedContbreak(BlockT *src1Blk,
1152 BlockT *src2Blk) {
1153 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1154 // same loop with LoopLandInfo without explicitly keeping track of
1155 // loopContBlks and loopBreakBlks, this is a method to get the information.
1156 //
1157 if (src1Blk->succ_size() == 0) {
1158 LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1159 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1160 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1161 if (theEntry != NULL) {
1162 if (DEBUGME) {
1163 errs() << "isLoopContBreakBlock yes src1 = BB"
1164 << src1Blk->getNumber()
1165 << " src2 = BB" << src2Blk->getNumber() << "\n";
1166 }
1167 return true;
1168 }
1169 }
1170 }
1171 return false;
1172 } //isSameloopDetachedContbreak
1173
1174 template
1175 int CFGStructurizer::handleJumpintoIf(BlockT *headBlk,
1176 BlockT *trueBlk,
1177 BlockT *falseBlk) {
1178 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1179 if (num == 0) {
1180 if (DEBUGME) {
1181 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1182 }
1183 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1184 }
1185 return num;
1186 }
1187
1188 template
1189 int CFGStructurizer::handleJumpintoIfImp(BlockT *headBlk,
1190 BlockT *trueBlk,
1191 BlockT *falseBlk) {
1192 int num = 0;
1193 BlockT *downBlk;
1194
1195 //trueBlk could be the common post dominator
1196 downBlk = trueBlk;
1197
1198 if (DEBUGME) {
1199 errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1200 << " true = BB" << trueBlk->getNumber()
1201 << ", numSucc=" << trueBlk->succ_size()
1202 << " false = BB" << falseBlk->getNumber() << "\n";
1203 }
1204
1205 while (downBlk) {
1206 if (DEBUGME) {
1207 errs() << "check down = BB" << downBlk->getNumber();
1208 }
1209
1210 if (//postDomTree->dominates(downBlk, falseBlk) &&
1211 singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1212 if (DEBUGME) {
1213 errs() << " working\n";
1214 }
1215
1216 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1217 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1218
1219 numClonedBlock += num;
1220 num += serialPatternMatch(*headBlk->succ_begin());
1221 num += serialPatternMatch(*(++headBlk->succ_begin()));
1222 num += ifPatternMatch(headBlk);
1223 assert(num > 0); //
1224
1225 break;
1226 }
1227 if (DEBUGME) {
1228 errs() << " not working\n";
1229 }
1230 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1231 } // walk down the postDomTree
1232
1233 return num;
1234 } //handleJumpintoIf
1235
1236 template
1237 void CFGStructurizer::showImproveSimpleJumpintoIf(BlockT *headBlk,
1238 BlockT *trueBlk,
1239 BlockT *falseBlk,
1240 BlockT *landBlk,
1241 bool detail) {
1242 errs() << "head = BB" << headBlk->getNumber()
1243 << " size = " << headBlk->size();
1244 if (detail) {
1245 errs() << "\n";
1246 headBlk->print(errs());
1247 errs() << "\n";
1248 }
1249
1250 if (trueBlk) {
1251 errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1252 << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1253 if (detail) {
1254 errs() << "\n";
1255 trueBlk->print(errs());
1256 errs() << "\n";
1257 }
1258 }
1259 if (falseBlk) {
1260 errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1261 << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1262 if (detail) {
1263 errs() << "\n";
1264 falseBlk->print(errs());
1265 errs() << "\n";
1266 }
1267 }
1268 if (landBlk) {
1269 errs() << ", land = BB" << landBlk->getNumber() << " size = "
1270 << landBlk->size() << " numPred = " << landBlk->pred_size();
1271 if (detail) {
1272 errs() << "\n";
1273 landBlk->print(errs());
1274 errs() << "\n";
1275 }
1276 }
1277
1278 errs() << "\n";
1279 } //showImproveSimpleJumpintoIf
1280
1281 template
1282 int CFGStructurizer::improveSimpleJumpintoIf(BlockT *headBlk,
1283 BlockT *trueBlk,
1284 BlockT *falseBlk,
1285 BlockT **plandBlk) {
1286 bool migrateTrue = false;
1287 bool migrateFalse = false;
1288
1289 BlockT *landBlk = *plandBlk;
1290
1291 assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1292 && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1293
1294 if (trueBlk == falseBlk) {
1295 return 0;
1296 }
1297
1298 #if 0
1299 if (DEBUGME) {
1300 errs() << "improveSimpleJumpintoIf: ";
1301 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1302 }
1303 #endif
1304
1305 // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
1306 // May consider the # landBlk->pred_size() as it represents the number of
1307 // assignment initReg = .. needed to insert.
1308 migrateTrue = needMigrateBlock(trueBlk);
1309 migrateFalse = needMigrateBlock(falseBlk);
1310
1311 if (!migrateTrue && !migrateFalse) {
1312 return 0;
1313 }
1314
1315 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1316 // have more than one predecessors. without doing this, its predecessor
1317 // rather than headBlk will have undefined value in initReg.
1318 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1319 migrateTrue = true;
1320 }
1321 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1322 migrateFalse = true;
1323 }
1324
1325 if (DEBUGME) {
1326 errs() << "before improveSimpleJumpintoIf: ";
1327 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1328 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1329 }
1330
1331 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1332 //
1333 // new: headBlk => if () {initReg = 1; org trueBlk branch} else