llvm.org GIT mirror llvm / eece9dc
Revert r155136 "Defer some shl transforms to DAGCombine." While the patch was perfect and defect free, it exposed a really nasty bug in X86 SelectionDAG that caused an llc crash when compiling lencod. I'll put the patch back in after fixing the SelectionDAG problem. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155181 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 7 years ago
7 changed file(s) with 58 addition(s) and 202 deletion(s). Raw diff Collapse all Expand all
528528 ShiftOp = 0;
529529
530530 if (ShiftOp && isa(ShiftOp->getOperand(1))) {
531
532 // This is a constant shift of a constant shift. Be careful about hiding
533 // shl instructions behind bit masks. They are used to represent multiplies
534 // by a constant, and it is important that simple arithmetic expressions
535 // are still recognizable by scalar evolution.
536 //
537 // The transforms applied to shl are very similar to the transforms applied
538 // to mul by constant. We can be more aggressive about optimizing right
539 // shifts.
540 //
541 // Combinations of right and left shifts will still be optimized in
542 // DAGCombine where scalar evolution no longer applies.
543
544531 ConstantInt *ShiftAmt1C = cast(ShiftOp->getOperand(1));
545532 uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
546533 uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
566553 }
567554
568555 if (ShiftAmt1 == ShiftAmt2) {
556 // If we have ((X >>? C) << C), turn this into X & (-1 << C).
557 if (I.getOpcode() == Instruction::Shl &&
558 ShiftOp->getOpcode() != Instruction::Shl) {
559 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
560 return BinaryOperator::CreateAnd(X,
561 ConstantInt::get(I.getContext(),Mask));
562 }
569563 // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
570564 if (I.getOpcode() == Instruction::LShr &&
571565 ShiftOp->getOpcode() == Instruction::Shl) {
575569 }
576570 } else if (ShiftAmt1 < ShiftAmt2) {
577571 uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
578
579 // (X >>?,exact C1) << C2 --> X << (C2-C1)
580 // The inexact version is deferred to DAGCombine so we don't hide shl
581 // behind a bit mask.
572
573 // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
582574 if (I.getOpcode() == Instruction::Shl &&
583 ShiftOp->getOpcode() != Instruction::Shl &&
584 ShiftOp->isExact()) {
575 ShiftOp->getOpcode() != Instruction::Shl) {
585576 assert(ShiftOp->getOpcode() == Instruction::LShr ||
586577 ShiftOp->getOpcode() == Instruction::AShr);
587578 ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
588 BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
589 X, ShiftDiffCst);
590 NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
591 NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
592 return NewShl;
593 }
594
579 if (ShiftOp->isExact()) {
580 // (X >>?,exact C1) << C2 --> X << (C2-C1)
581 BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
582 X, ShiftDiffCst);
583 NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
584 NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
585 return NewShl;
586 }
587 Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
588
589 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
590 return BinaryOperator::CreateAnd(Shift,
591 ConstantInt::get(I.getContext(),Mask));
592 }
593
595594 // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
596595 if (I.getOpcode() == Instruction::LShr &&
597596 ShiftOp->getOpcode() == Instruction::Shl) {
627626 assert(ShiftAmt2 < ShiftAmt1);
628627 uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
629628
630 // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
631 // The inexact version is deferred to DAGCombine so we don't hide shl
632 // behind a bit mask.
629 // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
633630 if (I.getOpcode() == Instruction::Shl &&
634 ShiftOp->getOpcode() != Instruction::Shl &&
635 ShiftOp->isExact()) {
631 ShiftOp->getOpcode() != Instruction::Shl) {
636632 ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
637 BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
638 X, ShiftDiffCst);
639 NewShr->setIsExact(true);
640 return NewShr;
641 }
642
633 if (ShiftOp->isExact()) {
634 // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
635 BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
636 X, ShiftDiffCst);
637 NewShr->setIsExact(true);
638 return NewShr;
639 }
640 Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(),
641 X, ShiftDiffCst);
642 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
643 return BinaryOperator::CreateAnd(Shift,
644 ConstantInt::get(I.getContext(),Mask));
645 }
646
643647 // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
644648 if (I.getOpcode() == Instruction::LShr &&
645649 ShiftOp->getOpcode() == Instruction::Shl) {
44 define i32 @main(i32 %argc) nounwind ssp {
55 entry:
66 %tmp3151 = trunc i32 %argc to i8
7 ; CHECK: %tmp3163 = shl i8 %tmp3162, 6
8 ; CHECK: and i8 %tmp3163, 64
7 ; CHECK: %tmp3162 = shl i8 %tmp3151, 5
8 ; CHECK: and i8 %tmp3162, 64
99 ; CHECK-NOT: shl
1010 ; CHECK-NOT: shr
1111 %tmp3161 = or i8 %tmp3151, -17
3737 %tmp10 = lshr i8 %tmp8, 7
3838 %tmp11 = shl i8 %tmp10, 5
3939
40 ; CHECK: %tmp10 = lshr i8 %tmp8, 7
41 ; CHECK: %tmp11 = shl nuw nsw i8 %tmp10, 5
40 ; CHECK: %0 = lshr i8 %tmp8, 2
41 ; CHECK: %tmp11 = and i8 %0, 32
4242
4343 %tmp12 = xor i8 %tmp11, %tmp9
4444 ret i8 %tmp12
4646 }
4747
4848 ; CHECK: @test6
49 ; CHECK: mul i55 %A, 6
49 ; CHECK-NOT: sh
5050 define i55 @test6(i55 %A) {
5151 %B = shl i55 %A, 1 ; [#uses=1]
5252 %C = mul i55 %B, 3 ; [#uses=1]
5353 ret i55 %C
5454 }
5555
56 ; CHECK: @test6a
57 ; CHECK: mul i55 %A, 6
58 define i55 @test6a(i55 %A) {
59 %B = mul i55 %A, 3 ; [#uses=1]
60 %C = shl i55 %B, 1 ; [#uses=1]
61 ret i55 %C
62 }
63
6456 ; CHECK: @test7
6557 ; CHECK-NOT: sh
6658 define i29 @test7(i8 %X) {
9486 }
9587
9688 ; CHECK: @test11
97 ; Don't hide the shl from scalar evolution. DAGCombine will get it.
98 ; CHECK: shl
89 ; CHECK-NOT: sh
9990 define i23 @test11(i23 %A) {
10091 %a = mul i23 %A, 3 ; [#uses=1]
10192 %B = lshr i23 %a, 11 ; [#uses=1]
112103 }
113104
114105 ; CHECK: @test13
115 ; Don't hide the shl from scalar evolution. DAGCombine will get it.
116 ; CHECK: shl
106 ; CHECK-NOT: sh
117107 define i18 @test13(i18 %A) {
118108 %a = mul i18 %A, 3 ; [#uses=1]
119109 %B = ashr i18 %a, 8 ; [#uses=1]
456456 %E = sext i32 %D to i64
457457 ret i64 %E
458458 ; CHECK: @test50
459 ; lshr+shl will be handled by DAGCombine.
460 ; CHECK-NEXT: lshr i64 %A, 2
461 ; CHECK-NEXT: shl i64 %a, 32
459 ; CHECK-NEXT: shl i64 %A, 30
462460 ; CHECK-NEXT: add i64 {{.*}}, -4294967296
463 ; CHECK-NEXT: %E = ashr exact i64 {{.*}}, 32
464 ; CHECK-NEXT: ret i64 %E
461 ; CHECK-NEXT: %sext = ashr i64 {{.*}}, 32
462 ; CHECK-NEXT: ret i64 %sext
465463 }
466464
467465 define i64 @test51(i64 %A, i1 %cond) {
6464 ; CHECK: @test6
6565 ; CHECK-NEXT: mul i32 %A, 6
6666 ; CHECK-NEXT: ret i32
67 %B = shl i32 %A, 1 ;; convert to an mul instruction
68 %C = mul i32 %B, 3
69 ret i32 %C
70 }
71
72 define i32 @test6a(i32 %A) {
73 ; CHECK: @test6a
74 ; CHECK-NEXT: mul i32 %A, 6
75 ; CHECK-NEXT: ret i32
76 %B = mul i32 %A, 3
77 %C = shl i32 %B, 1 ;; convert to an mul instruction
67 %B = shl i32 %A, 1 ;; convert to an mul instruction
68 %C = mul i32 %B, 3
7869 ret i32 %C
7970 }
8071
10596 ret i8 %C
10697 }
10798
108 ;; This transformation is deferred to DAGCombine:
10999 ;; (A >> 7) << 7 === A & 128
110 ;; The shl may be valuable to scalar evolution.
111100 define i8 @test10(i8 %A) {
112101 ; CHECK: @test10
113102 ; CHECK-NEXT: and i8 %A, -128
117106 ret i8 %C
118107 }
119108
120 ;; Allow the simplification when the lshr shift is exact.
121 define i8 @test10a(i8 %A) {
122 ; CHECK: @test10a
123 ; CHECK-NEXT: ret i8 %A
124 %B = lshr exact i8 %A, 7
125 %C = shl i8 %B, 7
126 ret i8 %C
127 }
128
129 ;; This transformation is deferred to DAGCombine:
130109 ;; (A >> 3) << 4 === (A & 0x1F) << 1
131 ;; The shl may be valuable to scalar evolution.
132110 define i8 @test11(i8 %A) {
133111 ; CHECK: @test11
134 ; CHECK: shl i8
112 ; CHECK-NEXT: mul i8 %A, 6
113 ; CHECK-NEXT: and i8
135114 ; CHECK-NEXT: ret i8
136115 %a = mul i8 %A, 3 ; [#uses=1]
137116 %B = lshr i8 %a, 3 ; [#uses=1]
139118 ret i8 %C
140119 }
141120
142 ;; Allow the simplification in InstCombine when the lshr shift is exact.
143 define i8 @test11a(i8 %A) {
144 ; CHECK: @test11a
145 ; CHECK-NEXT: mul i8 %A, 6
146 ; CHECK-NEXT: ret i8
147 %a = mul i8 %A, 3
148 %B = lshr exact i8 %a, 3
149 %C = shl i8 %B, 4
150 ret i8 %C
151 }
152
153 ;; This is deferred to DAGCombine unless %B is single-use.
154121 ;; (A >> 8) << 8 === A & -256
155122 define i32 @test12(i32 %A) {
156123 ; CHECK: @test12
161128 ret i32 %C
162129 }
163130
164 ;; This transformation is deferred to DAGCombine:
165131 ;; (A >> 3) << 4 === (A & -8) * 2
166 ;; The shl may be valuable to scalar evolution.
167132 define i8 @test13(i8 %A) {
168133 ; CHECK: @test13
169 ; CHECK: shl i8
134 ; CHECK-NEXT: mul i8 %A, 6
135 ; CHECK-NEXT: and i8
170136 ; CHECK-NEXT: ret i8
171137 %a = mul i8 %A, 3 ; [#uses=1]
172138 %B = ashr i8 %a, 3 ; [#uses=1]
173139 %C = shl i8 %B, 4 ; [#uses=1]
174 ret i8 %C
175 }
176
177 define i8 @test13a(i8 %A) {
178 ; CHECK: @test13a
179 ; CHECK-NEXT: mul i8 %A, 6
180 ; CHECK-NEXT: ret i8
181 %a = mul i8 %A, 3
182 %B = ashr exact i8 %a, 3
183 %C = shl i8 %B, 4
184140 ret i8 %C
185141 }
186142
520476 %tmp49 = lshr i8 %tmp48, 5
521477 %tmp50 = mul i8 %tmp49, 64
522478 %tmp51 = xor i8 %tmp50, %tmp5
479 ; CHECK: and i8 %0, 16
523480 %tmp52 = and i8 %tmp51, -128
524481 %tmp53 = lshr i8 %tmp52, 7
525 ; CHECK: lshr i8 %tmp51, 7
526482 %tmp54 = mul i8 %tmp53, 16
527 ; CHECK: shl nuw nsw i8 %tmp53, 4
528483 %tmp55 = xor i8 %tmp54, %tmp51
529484 ; CHECK: ret i8 %tmp551
530485 ret i8 %tmp55
2121 ; CHECK: @test1
2222 ; CHECK-NEXT: ret void
2323 }
24
25 ; This function exposes a phase ordering problem when InstCombine is
26 ; turning %add into a bitmask, making it difficult to spot a 0 return value.
27 ;
28 ; It it also important that %add is expressed as a multiple of %div so scalar
29 ; evolution can recognize it.
30 define i32 @test2(i32 %a, i32* %p) nounwind uwtable ssp {
31 entry:
32 %div = udiv i32 %a, 4
33 %arrayidx = getelementptr inbounds i32* %p, i64 0
34 store i32 %div, i32* %arrayidx, align 4
35 %add = add i32 %div, %div
36 %arrayidx1 = getelementptr inbounds i32* %p, i64 1
37 store i32 %add, i32* %arrayidx1, align 4
38 %arrayidx2 = getelementptr inbounds i32* %p, i64 1
39 %0 = load i32* %arrayidx2, align 4
40 %arrayidx3 = getelementptr inbounds i32* %p, i64 0
41 %1 = load i32* %arrayidx3, align 4
42 %mul = mul i32 2, %1
43 %sub = sub i32 %0, %mul
44 ret i32 %sub
45
46 ; CHECK: @test2
47 ; CHECK: %div = lshr i32 %a, 2
48 ; CHECK: %add = shl nuw nsw i32 %div, 1
49 ; CHECK: ret i32 0
50 }
+0
-64
test/Transforms/PhaseOrdering/scev.ll less more
None ; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s
1 ;
2 ; This file contains phase ordering tests for scalar evolution.
3 ; Test that the standard passes don't obfuscate the IR so scalar evolution can't
4 ; recognize expressions.
5
6 ; CHECK: test1
7 ; The loop body contains two increments by %div.
8 ; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d.
9 ; CHECK: --> {%p,+,(2 * (%d /u 4) * sizeof(i32))}
10 define void @test1(i64 %d, i32* %p) nounwind uwtable ssp {
11 entry:
12 %div = udiv i64 %d, 4
13 br label %for.cond
14
15 for.cond: ; preds = %for.inc, %entry
16 %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
17 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
18 %cmp = icmp ne i32 %i.0, 64
19 br i1 %cmp, label %for.body, label %for.end
20
21 for.body: ; preds = %for.cond
22 store i32 0, i32* %p.addr.0, align 4
23 %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
24 store i32 1, i32* %add.ptr, align 4
25 %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
26 br label %for.inc
27
28 for.inc: ; preds = %for.body
29 %inc = add i32 %i.0, 1
30 br label %for.cond
31
32 for.end: ; preds = %for.cond
33 ret void
34 }
35
36 ; CHECK: test1a
37 ; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2)
38 ; CHECK: --> {%p,+,(2 * (%d /u 2) * sizeof(i32))}
39 define void @test1a(i64 %d, i32* %p) nounwind uwtable ssp {
40 entry:
41 %div = udiv i64 %d, 2
42 br label %for.cond
43
44 for.cond: ; preds = %for.inc, %entry
45 %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
46 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
47 %cmp = icmp ne i32 %i.0, 64
48 br i1 %cmp, label %for.body, label %for.end
49
50 for.body: ; preds = %for.cond
51 store i32 0, i32* %p.addr.0, align 4
52 %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
53 store i32 1, i32* %add.ptr, align 4
54 %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
55 br label %for.inc
56
57 for.inc: ; preds = %for.body
58 %inc = add i32 %i.0, 1
59 br label %for.cond
60
61 for.end: ; preds = %for.cond
62 ret void
63 }