llvm.org GIT mirror llvm / e50e6f3
[ARM] Extract shifts out of multiply-by-constant Turning (op x (mul y k)) into (op x (lsl (mul y k>>n) n)) is beneficial when we can do the lsl as a shifted operand and the resulting multiply constant is simpler to generate. Do this by doing the transformation when trying to select a shifted operand, as that ensures that it actually turns out better (the alternative would be to do it in PreprocessISelDAG, but we don't know for sure there if extracting the shift would allow a shifted operand to be used). Differential Revision: http://reviews.llvm.org/D12196 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247569 91177308-0d34-0410-b5e6-96231b3b80d8 John Brawn 4 years ago
2 changed file(s) with 293 addition(s) and 50 deletion(s). Raw diff Collapse all Expand all
270270 // Get the alignment operand for a NEON VLD or VST instruction.
271271 SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
272272 bool is64BitVector);
273
274 /// Returns the number of instructions required to materialize the given
275 /// constant in a register, or 3 if a literal pool load is needed.
276 unsigned ConstantMaterializationCost(unsigned Val) const;
277
278 /// Checks if N is a multiplication by a constant where we can extract out a
279 /// power of two from the constant so that it can be used in a shift, but only
280 /// if it simplifies the materialization of the constant. Returns true if it
281 /// is, and assigns to PowerOfTwo the power of two that should be extracted
282 /// out and to NewMulConst the new constant to be multiplied by.
283 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
284 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
285
286 /// Replace N with M in CurDAG, in a way that also ensures that M gets
287 /// selected when N would have been selected.
288 void replaceDAGValue(const SDValue &N, SDValue M);
273289 };
274290 }
275291
463479 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
464480 }
465481
482 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
483 if (Subtarget->isThumb()) {
484 if (Val <= 255) return 1; // MOV
485 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
486 if (~Val <= 255) return 2; // MOV + MVN
487 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
488 } else {
489 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
490 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
491 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
492 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
493 }
494 if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
495 return 3; // Literal pool load
496 }
497
498 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
499 unsigned MaxShift,
500 unsigned &PowerOfTwo,
501 SDValue &NewMulConst) const {
502 assert(N.getOpcode() == ISD::MUL);
503 assert(MaxShift > 0);
504
505 // If the multiply is used in more than one place then changing the constant
506 // will make other uses incorrect, so don't.
507 if (!N.hasOneUse()) return false;
508 // Check if the multiply is by a constant
509 ConstantSDNode *MulConst = dyn_cast(N.getOperand(1));
510 if (!MulConst) return false;
511 // If the constant is used in more than one place then modifying it will mean
512 // we need to materialize two constants instead of one, which is a bad idea.
513 if (!MulConst->hasOneUse()) return false;
514 unsigned MulConstVal = MulConst->getZExtValue();
515 if (MulConstVal == 0) return false;
516
517 // Find the largest power of 2 that MulConstVal is a multiple of
518 PowerOfTwo = MaxShift;
519 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
520 --PowerOfTwo;
521 if (PowerOfTwo == 0) return false;
522 }
523
524 // Only optimise if the new cost is better
525 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
526 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
527 unsigned OldCost = ConstantMaterializationCost(MulConstVal);
528 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
529 return NewCost < OldCost;
530 }
531
532 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
533 CurDAG->RepositionNode(N.getNode(), M.getNode());
534 CurDAG->ReplaceAllUsesWith(N, M);
535 }
536
466537 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
467538 SDValue &BaseReg,
468539 SDValue &Opc,
469540 bool CheckProfitability) {
470541 if (DisableShifterOp)
471542 return false;
543
544 // If N is a multiply-by-constant and it's profitable to extract a shift and
545 // use it in a shifted operand do so.
546 if (N.getOpcode() == ISD::MUL) {
547 unsigned PowerOfTwo = 0;
548 SDValue NewMulConst;
549 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
550 replaceDAGValue(N.getOperand(1), NewMulConst);
551 BaseReg = N;
552 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
553 PowerOfTwo),
554 SDLoc(N), MVT::i32);
555 return true;
556 }
557 }
472558
473559 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
474560
651737 } else {
652738 ShOpcVal = ARM_AM::no_shift;
653739 }
740 }
741 }
742
743 // If Offset is a multiply-by-constant and it's profitable to extract a shift
744 // and use it in a shifted operand do so.
745 if (Offset.getOpcode() == ISD::MUL) {
746 unsigned PowerOfTwo = 0;
747 SDValue NewMulConst;
748 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
749 replaceDAGValue(Offset.getOperand(1), NewMulConst);
750 ShAmt = PowerOfTwo;
751 ShOpcVal = ARM_AM::lsl;
654752 }
655753 }
656754
13101408 else {
13111409 ShAmt = 0;
13121410 }
1411 }
1412 }
1413
1414 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1415 // and use it in a shifted operand do so.
1416 if (OffReg.getOpcode() == ISD::MUL) {
1417 unsigned PowerOfTwo = 0;
1418 SDValue NewMulConst;
1419 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1420 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1421 ShAmt = PowerOfTwo;
13131422 }
13141423 }
13151424
23912500 }
23922501 case ISD::Constant: {
23932502 unsigned Val = cast(N)->getZExtValue();
2394 bool UseCP = true;
2395 if (Subtarget->useMovt(*MF))
2396 // Thumb2-aware targets have the MOVT instruction, so all immediates can
2397 // be done with MOV + MOVT, at worst.
2398 UseCP = false;
2399 else {
2400 if (Subtarget->isThumb()) {
2401 UseCP = (Val > 255 && // MOV
2402 ~Val > 255 && // MOV + MVN
2403 !ARM_AM::isThumbImmShiftedVal(Val) && // MOV + LSL
2404 !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
2405 } else
2406 UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
2407 ARM_AM::getSOImmVal(~Val) == -1 && // MVN
2408 !ARM_AM::isSOImmTwoPartVal(Val) && // two instrs.
2409 !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
2410 }
2411
2412 if (UseCP) {
2503 // If we can't materialize the constant we need to use a literal pool
2504 if (ConstantMaterializationCost(Val) > 2) {
24132505 SDValue CPIdx = CurDAG->getTargetConstantPool(
24142506 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
24152507 TLI->getPointerTy(CurDAG->getDataLayout()));
None ; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
1 ; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
0 ; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-ARM
1 ; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-ARM
2 ; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMB
23 ; rdar://8576755
34
45
56 define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
6 ; A8-LABEL: test1:
7 ; A8: add r0, r0, r1, lsl r2
8
9 ; A9-LABEL: test1:
10 ; A9: add r0, r0, r1, lsl r2
7 ; CHECK-LABEL: test1:
8 ; CHECK-ARM: add r0, r0, r1, lsl r2
9 ; CHECK-THUMB: lsls r1, r2
10 ; CHECK-THUMB: add r0, r1
1111 %shift.upgrd.1 = zext i8 %sh to i32
1212 %A = shl i32 %Y, %shift.upgrd.1
1313 %B = add i32 %X, %A
1515 }
1616
1717 define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
18 ; A8-LABEL: test2:
19 ; A8: bic r0, r0, r1, asr r2
20
21 ; A9-LABEL: test2:
22 ; A9: bic r0, r0, r1, asr r2
18 ; CHECK-LABEL: test2:
19 ; CHECK-ARM: bic r0, r0, r1, asr r2
20 ; CHECK-THUMB: asrs r1, r2
21 ; CHECK-THUMB: bics r0, r1
2322 %shift.upgrd.2 = zext i8 %sh to i32
2423 %A = ashr i32 %Y, %shift.upgrd.2
2524 %B = xor i32 %A, -1
2928
3029 define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
3130 entry:
32 ; A8-LABEL: test3:
33 ; A8: ldr r0, [r0, r2, lsl #2]
34 ; A8: ldr r1, [r1, r2, lsl #2]
35
36 ; lsl #2 is free
37 ; A9-LABEL: test3:
38 ; A9: ldr r0, [r0, r2, lsl #2]
39 ; A9: ldr r1, [r1, r2, lsl #2]
31 ; CHECK-LABEL: test3:
32 ; CHECK: ldr{{(.w)?}} r0, [r0, r2, lsl #2]
33 ; CHECK: ldr{{(.w)?}} r1, [r1, r2, lsl #2]
4034 %tmp1 = shl i32 %offset, 2
4135 %tmp2 = add i32 %base, %tmp1
4236 %tmp3 = inttoptr i32 %tmp2 to i32*
5246
5347 define fastcc void @test4(i16 %addr) nounwind {
5448 entry:
55 ; A8-LABEL: test4:
56 ; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
57 ; A8-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
58 ; A8: str [[REG]], [r0, r1, lsl #2]
59 ; A8-NOT: str [[REG]], [r0]
60
61 ; A9-LABEL: test4:
62 ; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
63 ; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
64 ; A9: str [[REG]], [r0, r1, lsl #2]
65 ; A9-NOT: str [[REG]], [r0]
49 ; CHECK-LABEL: test4:
50 ; CHECK: ldr{{(.w)?}} [[REG:r[0-9]+]], [r0, r1, lsl #2]
51 ; CHECK-NOT: ldr{{(.w)?}} [[REG:r[0-9]+]], [r0, r1, lsl #2]!
52 ; CHECK: str{{(.w)?}} [[REG]], [r0, r1, lsl #2]
53 ; CHECK-NOT: str{{(.w)?}} [[REG]], [r0]
6654 %0 = tail call i8* (...) @malloc(i32 undef) nounwind
6755 %1 = bitcast i8* %0 to i32*
6856 %2 = sext i16 %addr to i32
7260 store i32 %5, i32* %3, align 4
7361 ret void
7462 }
63
64 define i32 @test_orr_extract_from_mul_1(i32 %x, i32 %y) {
65 entry:
66 ; CHECK-LABEL: test_orr_extract_from_mul_1
67 ; CHECK: movw r2, #63767
68 ; CHECK-ARM: mul r1, r1, r2
69 ; CHECK-ARM: orr r0, r1, r0
70 ; CHECK-THUMB: muls r1, r2, r1
71 ; CHECk-THUMB: orrs r0, r1
72 %mul = mul i32 %y, 63767
73 %or = or i32 %mul, %x
74 ret i32 %or
75 }
76
77 define i32 @test_orr_extract_from_mul_2(i32 %x, i32 %y) {
78 ; CHECK-LABEL: test_orr_extract_from_mul_2
79 ; CHECK: movw r2, #63767
80 ; CHECK-ARM: mul r1, r1, r2
81 ; CHECK-THUMB: muls r1, r2, r1
82 ; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #1
83 entry:
84 %mul1 = mul i32 %y, 127534
85 %or = or i32 %mul1, %x
86 ret i32 %or
87 }
88
89 define i32 @test_orr_extract_from_mul_3(i32 %x, i32 %y) {
90 ; CHECK-LABEL: test_orr_extract_from_mul_3
91 ; CHECK: movw r2, #63767
92 ; CHECK-ARM: mul r1, r1, r2
93 ; CHECK-THUMB: muls r1, r2, r1
94 ; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #2
95 entry:
96 %mul1 = mul i32 %y, 255068
97 %or = or i32 %mul1, %x
98 ret i32 %or
99 }
100
101 define i32 @test_orr_extract_from_mul_4(i32 %x, i32 %y) {
102 ; CHECK-LABEL: test_orr_extract_from_mul_4
103 ; CHECK: movw r2, #63767
104 ; CHECK-ARM: mul r1, r1, r2
105 ; CHECK-THUMB: muls r1, r2, r1
106 ; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #3
107 entry:
108 %mul1 = mul i32 %y, 510136
109 %or = or i32 %mul1, %x
110 ret i32 %or
111 }
112
113 define i32 @test_orr_extract_from_mul_5(i32 %x, i32 %y) {
114 ; CHECK-LABEL: test_orr_extract_from_mul_5
115 ; CHECK: movw r2, #63767
116 ; CHECK-ARM: mul r1, r1, r2
117 ; CHECK-THUMB: muls r1, r2, r1
118 ; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #4
119 entry:
120 %mul1 = mul i32 %y, 1020272
121 %or = or i32 %mul1, %x
122 ret i32 %or
123 }
124
125 define i32 @test_orr_extract_from_mul_6(i32 %x, i32 %y) {
126 ; CHECK-LABEL: test_orr_extract_from_mul_6
127 ; CHECK: movw r2, #63767
128 ; CHECK-ARM: mul r1, r1, r2
129 ; CHECK-THUMB: muls r1, r2, r1
130 ; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #16
131 entry:
132 %mul = mul i32 %y, -115933184
133 %or = or i32 %mul, %x
134 ret i32 %or
135 }
136
137 define i32 @test_load_extract_from_mul_1(i8* %x, i32 %y) {
138 ; CHECK-LABEL: test_load_extract_from_mul_1
139 ; CHECK: movw r2, #63767
140 ; CHECK-ARM: mul r1, r1, r2
141 ; CHECK-THUMB: muls r1, r2, r1
142 ; CHECK: ldrb r0, [r0, r1]
143 entry:
144 %mul = mul i32 %y, 63767
145 %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul
146 %0 = load i8, i8* %arrayidx, align 1
147 %conv = zext i8 %0 to i32
148 ret i32 %conv
149 }
150
151 define i32 @test_load_extract_from_mul_2(i8* %x, i32 %y) {
152 ; CHECK-LABEL: test_load_extract_from_mul_2
153 ; CHECK: movw r2, #63767
154 ; CHECK-ARM: mul r1, r1, r2
155 ; CHECK-THUMB: muls r1, r2, r1
156 ; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #1]
157 entry:
158 %mul1 = mul i32 %y, 127534
159 %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
160 %0 = load i8, i8* %arrayidx, align 1
161 %conv = zext i8 %0 to i32
162 ret i32 %conv
163 }
164
165 define i32 @test_load_extract_from_mul_3(i8* %x, i32 %y) {
166 ; CHECK-LABEL: test_load_extract_from_mul_3
167 ; CHECK: movw r2, #63767
168 ; CHECK-ARM: mul r1, r1, r2
169 ; CHECK-THUMB: muls r1, r2, r1
170 ; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #2]
171 entry:
172 %mul1 = mul i32 %y, 255068
173 %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
174 %0 = load i8, i8* %arrayidx, align 1
175 %conv = zext i8 %0 to i32
176 ret i32 %conv
177 }
178
179 define i32 @test_load_extract_from_mul_4(i8* %x, i32 %y) {
180 ; CHECK-LABEL: test_load_extract_from_mul_4
181 ; CHECK: movw r2, #63767
182 ; CHECK-ARM: mul r1, r1, r2
183 ; CHECK-THUMB: muls r1, r2, r1
184 ; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #3]
185 entry:
186 %mul1 = mul i32 %y, 510136
187 %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
188 %0 = load i8, i8* %arrayidx, align 1
189 %conv = zext i8 %0 to i32
190 ret i32 %conv
191 }
192
193 define i32 @test_load_extract_from_mul_5(i8* %x, i32 %y) {
194 ; CHECK-LABEL: test_load_extract_from_mul_5
195 ; CHECK-ARM: movw r2, #63767
196 ; CHECK-ARM: mul r1, r1, r2
197 ; CHECK-ARM: ldrb r0, [r0, r1, lsl #4]
198 ; CHECK-THUMB: movw r2, #37232
199 ; CHECK-THUMB: movt r2, #15
200 ; CHECK-THUMB: muls r1, r2, r1
201 ; CHECK-THUMB: ldrb r0, [r0, r1]
202 entry:
203 %mul1 = mul i32 %y, 1020272
204 %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
205 %0 = load i8, i8* %arrayidx, align 1
206 %conv = zext i8 %0 to i32
207 ret i32 %conv
208 }
209
210 define i32 @test_load_extract_from_mul_6(i8* %x, i32 %y) {
211 ; CHECK-LABEL: test_load_extract_from_mul_6
212 ; CHECK-ARM: movw r2, #63767
213 ; CHECK-ARM: mul r1, r1, r2
214 ; CHECK-ARM: ldrb r0, [r0, r1, lsl #16]
215 ; CHECK-THUMB: movs r2, #0
216 ; CHECK-THUMB: movt r2, #63767
217 ; CHECK-THUMB: muls r1, r2, r1
218 ; CHECK-THUMB: ldrb r0, [r0, r1]
219 entry:
220 %mul = mul i32 %y, -115933184
221 %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul
222 %0 = load i8, i8* %arrayidx, align 1
223 %conv = zext i8 %0 to i32
224 ret i32 %conv
225 }