llvm.org GIT mirror llvm / 0d5fcae
Defer some shl transforms to DAGCombine. The shl instruction is used to represent multiplication by a constant power of two as well as bitwise left shifts. Some InstCombine transformations would turn an shl instruction into a bit mask operation, making it difficult for later analysis passes to recognize the constsnt multiplication. Disable those shl transformations, deferring them to DAGCombine time. An 'shl X, C' instruction is now treated mostly the same was as 'mul X, C'. These transformations are deferred: (X >>? C) << C --> X & (-1 << C) (When X >> C has multiple uses) (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) (When C2 > C1) (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) (When C1 > C2) The corresponding exact transformations are preserved, just like div-exact + mul: (X >>?,exact C) << C --> X (X >>?,exact C1) << C2 --> X << (C2-C1) (X >>?,exact C1) << C2 --> X >>?,exact (C1-C2) The disabled transformations could also prevent the instruction selector from recognizing rotate patterns in hash functions and cryptographic primitives. I have a test case for that, but it is too fragile. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155136 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 7 years ago
7 changed file(s) with 202 addition(s) and 58 deletion(s). Raw diff Collapse all Expand all
528528 ShiftOp = 0;
529529
530530 if (ShiftOp && isa(ShiftOp->getOperand(1))) {
531
532 // This is a constant shift of a constant shift. Be careful about hiding
533 // shl instructions behind bit masks. They are used to represent multiplies
534 // by a constant, and it is important that simple arithmetic expressions
535 // are still recognizable by scalar evolution.
536 //
537 // The transforms applied to shl are very similar to the transforms applied
538 // to mul by constant. We can be more aggressive about optimizing right
539 // shifts.
540 //
541 // Combinations of right and left shifts will still be optimized in
542 // DAGCombine where scalar evolution no longer applies.
543
531544 ConstantInt *ShiftAmt1C = cast(ShiftOp->getOperand(1));
532545 uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
533546 uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
553566 }
554567
555568 if (ShiftAmt1 == ShiftAmt2) {
556 // If we have ((X >>? C) << C), turn this into X & (-1 << C).
557 if (I.getOpcode() == Instruction::Shl &&
558 ShiftOp->getOpcode() != Instruction::Shl) {
559 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
560 return BinaryOperator::CreateAnd(X,
561 ConstantInt::get(I.getContext(),Mask));
562 }
563569 // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
564570 if (I.getOpcode() == Instruction::LShr &&
565571 ShiftOp->getOpcode() == Instruction::Shl) {
569575 }
570576 } else if (ShiftAmt1 < ShiftAmt2) {
571577 uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
572
573 // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
578
579 // (X >>?,exact C1) << C2 --> X << (C2-C1)
580 // The inexact version is deferred to DAGCombine so we don't hide shl
581 // behind a bit mask.
574582 if (I.getOpcode() == Instruction::Shl &&
575 ShiftOp->getOpcode() != Instruction::Shl) {
583 ShiftOp->getOpcode() != Instruction::Shl &&
584 ShiftOp->isExact()) {
576585 assert(ShiftOp->getOpcode() == Instruction::LShr ||
577586 ShiftOp->getOpcode() == Instruction::AShr);
578587 ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
579 if (ShiftOp->isExact()) {
580 // (X >>?,exact C1) << C2 --> X << (C2-C1)
581 BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
582 X, ShiftDiffCst);
583 NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
584 NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
585 return NewShl;
586 }
587 Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
588
589 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
590 return BinaryOperator::CreateAnd(Shift,
591 ConstantInt::get(I.getContext(),Mask));
592 }
593
588 BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
589 X, ShiftDiffCst);
590 NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
591 NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
592 return NewShl;
593 }
594
594595 // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
595596 if (I.getOpcode() == Instruction::LShr &&
596597 ShiftOp->getOpcode() == Instruction::Shl) {
626627 assert(ShiftAmt2 < ShiftAmt1);
627628 uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
628629
629 // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
630 // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
631 // The inexact version is deferred to DAGCombine so we don't hide shl
632 // behind a bit mask.
630633 if (I.getOpcode() == Instruction::Shl &&
631 ShiftOp->getOpcode() != Instruction::Shl) {
634 ShiftOp->getOpcode() != Instruction::Shl &&
635 ShiftOp->isExact()) {
632636 ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
633 if (ShiftOp->isExact()) {
634 // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
635 BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
636 X, ShiftDiffCst);
637 NewShr->setIsExact(true);
638 return NewShr;
639 }
640 Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(),
641 X, ShiftDiffCst);
642 APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
643 return BinaryOperator::CreateAnd(Shift,
644 ConstantInt::get(I.getContext(),Mask));
645 }
646
637 BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
638 X, ShiftDiffCst);
639 NewShr->setIsExact(true);
640 return NewShr;
641 }
642
647643 // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
648644 if (I.getOpcode() == Instruction::LShr &&
649645 ShiftOp->getOpcode() == Instruction::Shl) {
44 define i32 @main(i32 %argc) nounwind ssp {
55 entry:
66 %tmp3151 = trunc i32 %argc to i8
7 ; CHECK: %tmp3162 = shl i8 %tmp3151, 5
8 ; CHECK: and i8 %tmp3162, 64
7 ; CHECK: %tmp3163 = shl i8 %tmp3162, 6
8 ; CHECK: and i8 %tmp3163, 64
99 ; CHECK-NOT: shl
1010 ; CHECK-NOT: shr
1111 %tmp3161 = or i8 %tmp3151, -17
3737 %tmp10 = lshr i8 %tmp8, 7
3838 %tmp11 = shl i8 %tmp10, 5
3939
40 ; CHECK: %0 = lshr i8 %tmp8, 2
41 ; CHECK: %tmp11 = and i8 %0, 32
40 ; CHECK: %tmp10 = lshr i8 %tmp8, 7
41 ; CHECK: %tmp11 = shl nuw nsw i8 %tmp10, 5
4242
4343 %tmp12 = xor i8 %tmp11, %tmp9
4444 ret i8 %tmp12
4646 }
4747
4848 ; CHECK: @test6
49 ; CHECK-NOT: sh
49 ; CHECK: mul i55 %A, 6
5050 define i55 @test6(i55 %A) {
5151 %B = shl i55 %A, 1 ; [#uses=1]
5252 %C = mul i55 %B, 3 ; [#uses=1]
5353 ret i55 %C
5454 }
5555
56 ; CHECK: @test6a
57 ; CHECK: mul i55 %A, 6
58 define i55 @test6a(i55 %A) {
59 %B = mul i55 %A, 3 ; [#uses=1]
60 %C = shl i55 %B, 1 ; [#uses=1]
61 ret i55 %C
62 }
63
5664 ; CHECK: @test7
5765 ; CHECK-NOT: sh
5866 define i29 @test7(i8 %X) {
8694 }
8795
8896 ; CHECK: @test11
89 ; CHECK-NOT: sh
97 ; Don't hide the shl from scalar evolution. DAGCombine will get it.
98 ; CHECK: shl
9099 define i23 @test11(i23 %A) {
91100 %a = mul i23 %A, 3 ; [#uses=1]
92101 %B = lshr i23 %a, 11 ; [#uses=1]
103112 }
104113
105114 ; CHECK: @test13
106 ; CHECK-NOT: sh
115 ; Don't hide the shl from scalar evolution. DAGCombine will get it.
116 ; CHECK: shl
107117 define i18 @test13(i18 %A) {
108118 %a = mul i18 %A, 3 ; [#uses=1]
109119 %B = ashr i18 %a, 8 ; [#uses=1]
456456 %E = sext i32 %D to i64
457457 ret i64 %E
458458 ; CHECK: @test50
459 ; CHECK-NEXT: shl i64 %A, 30
459 ; lshr+shl will be handled by DAGCombine.
460 ; CHECK-NEXT: lshr i64 %A, 2
461 ; CHECK-NEXT: shl i64 %a, 32
460462 ; CHECK-NEXT: add i64 {{.*}}, -4294967296
461 ; CHECK-NEXT: %sext = ashr i64 {{.*}}, 32
462 ; CHECK-NEXT: ret i64 %sext
463 ; CHECK-NEXT: %E = ashr exact i64 {{.*}}, 32
464 ; CHECK-NEXT: ret i64 %E
463465 }
464466
465467 define i64 @test51(i64 %A, i1 %cond) {
6464 ; CHECK: @test6
6565 ; CHECK-NEXT: mul i32 %A, 6
6666 ; CHECK-NEXT: ret i32
67 %B = shl i32 %A, 1 ;; convert to an mul instruction
68 %C = mul i32 %B, 3
67 %B = shl i32 %A, 1 ;; convert to an mul instruction
68 %C = mul i32 %B, 3
69 ret i32 %C
70 }
71
72 define i32 @test6a(i32 %A) {
73 ; CHECK: @test6a
74 ; CHECK-NEXT: mul i32 %A, 6
75 ; CHECK-NEXT: ret i32
76 %B = mul i32 %A, 3
77 %C = shl i32 %B, 1 ;; convert to an mul instruction
6978 ret i32 %C
7079 }
7180
96105 ret i8 %C
97106 }
98107
108 ;; This transformation is deferred to DAGCombine:
99109 ;; (A >> 7) << 7 === A & 128
110 ;; The shl may be valuable to scalar evolution.
100111 define i8 @test10(i8 %A) {
101112 ; CHECK: @test10
102113 ; CHECK-NEXT: and i8 %A, -128
106117 ret i8 %C
107118 }
108119
120 ;; Allow the simplification when the lshr shift is exact.
121 define i8 @test10a(i8 %A) {
122 ; CHECK: @test10a
123 ; CHECK-NEXT: ret i8 %A
124 %B = lshr exact i8 %A, 7
125 %C = shl i8 %B, 7
126 ret i8 %C
127 }
128
129 ;; This transformation is deferred to DAGCombine:
109130 ;; (A >> 3) << 4 === (A & 0x1F) << 1
131 ;; The shl may be valuable to scalar evolution.
110132 define i8 @test11(i8 %A) {
111133 ; CHECK: @test11
112 ; CHECK-NEXT: mul i8 %A, 6
113 ; CHECK-NEXT: and i8
134 ; CHECK: shl i8
114135 ; CHECK-NEXT: ret i8
115136 %a = mul i8 %A, 3 ; [#uses=1]
116137 %B = lshr i8 %a, 3 ; [#uses=1]
118139 ret i8 %C
119140 }
120141
142 ;; Allow the simplification in InstCombine when the lshr shift is exact.
143 define i8 @test11a(i8 %A) {
144 ; CHECK: @test11a
145 ; CHECK-NEXT: mul i8 %A, 6
146 ; CHECK-NEXT: ret i8
147 %a = mul i8 %A, 3
148 %B = lshr exact i8 %a, 3
149 %C = shl i8 %B, 4
150 ret i8 %C
151 }
152
153 ;; This is deferred to DAGCombine unless %B is single-use.
121154 ;; (A >> 8) << 8 === A & -256
122155 define i32 @test12(i32 %A) {
123156 ; CHECK: @test12
128161 ret i32 %C
129162 }
130163
164 ;; This transformation is deferred to DAGCombine:
131165 ;; (A >> 3) << 4 === (A & -8) * 2
166 ;; The shl may be valuable to scalar evolution.
132167 define i8 @test13(i8 %A) {
133168 ; CHECK: @test13
134 ; CHECK-NEXT: mul i8 %A, 6
135 ; CHECK-NEXT: and i8
169 ; CHECK: shl i8
136170 ; CHECK-NEXT: ret i8
137171 %a = mul i8 %A, 3 ; [#uses=1]
138172 %B = ashr i8 %a, 3 ; [#uses=1]
139173 %C = shl i8 %B, 4 ; [#uses=1]
174 ret i8 %C
175 }
176
177 define i8 @test13a(i8 %A) {
178 ; CHECK: @test13a
179 ; CHECK-NEXT: mul i8 %A, 6
180 ; CHECK-NEXT: ret i8
181 %a = mul i8 %A, 3
182 %B = ashr exact i8 %a, 3
183 %C = shl i8 %B, 4
140184 ret i8 %C
141185 }
142186
476520 %tmp49 = lshr i8 %tmp48, 5
477521 %tmp50 = mul i8 %tmp49, 64
478522 %tmp51 = xor i8 %tmp50, %tmp5
479 ; CHECK: and i8 %0, 16
480523 %tmp52 = and i8 %tmp51, -128
481524 %tmp53 = lshr i8 %tmp52, 7
525 ; CHECK: lshr i8 %tmp51, 7
482526 %tmp54 = mul i8 %tmp53, 16
527 ; CHECK: shl nuw nsw i8 %tmp53, 4
483528 %tmp55 = xor i8 %tmp54, %tmp51
484529 ; CHECK: ret i8 %tmp551
485530 ret i8 %tmp55
2121 ; CHECK: @test1
2222 ; CHECK-NEXT: ret void
2323 }
24
25 ; This function exposes a phase ordering problem when InstCombine is
26 ; turning %add into a bitmask, making it difficult to spot a 0 return value.
27 ;
28 ; It it also important that %add is expressed as a multiple of %div so scalar
29 ; evolution can recognize it.
30 define i32 @test2(i32 %a, i32* %p) nounwind uwtable ssp {
31 entry:
32 %div = udiv i32 %a, 4
33 %arrayidx = getelementptr inbounds i32* %p, i64 0
34 store i32 %div, i32* %arrayidx, align 4
35 %add = add i32 %div, %div
36 %arrayidx1 = getelementptr inbounds i32* %p, i64 1
37 store i32 %add, i32* %arrayidx1, align 4
38 %arrayidx2 = getelementptr inbounds i32* %p, i64 1
39 %0 = load i32* %arrayidx2, align 4
40 %arrayidx3 = getelementptr inbounds i32* %p, i64 0
41 %1 = load i32* %arrayidx3, align 4
42 %mul = mul i32 2, %1
43 %sub = sub i32 %0, %mul
44 ret i32 %sub
45
46 ; CHECK: @test2
47 ; CHECK: %div = lshr i32 %a, 2
48 ; CHECK: %add = shl nuw nsw i32 %div, 1
49 ; CHECK: ret i32 0
50 }
0 ; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s
1 ;
2 ; This file contains phase ordering tests for scalar evolution.
3 ; Test that the standard passes don't obfuscate the IR so scalar evolution can't
4 ; recognize expressions.
5
6 ; CHECK: test1
7 ; The loop body contains two increments by %div.
8 ; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d.
9 ; CHECK: --> {%p,+,(2 * (%d /u 4) * sizeof(i32))}
10 define void @test1(i64 %d, i32* %p) nounwind uwtable ssp {
11 entry:
12 %div = udiv i64 %d, 4
13 br label %for.cond
14
15 for.cond: ; preds = %for.inc, %entry
16 %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
17 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
18 %cmp = icmp ne i32 %i.0, 64
19 br i1 %cmp, label %for.body, label %for.end
20
21 for.body: ; preds = %for.cond
22 store i32 0, i32* %p.addr.0, align 4
23 %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
24 store i32 1, i32* %add.ptr, align 4
25 %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
26 br label %for.inc
27
28 for.inc: ; preds = %for.body
29 %inc = add i32 %i.0, 1
30 br label %for.cond
31
32 for.end: ; preds = %for.cond
33 ret void
34 }
35
36 ; CHECK: test1a
37 ; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2)
38 ; CHECK: --> {%p,+,(2 * (%d /u 2) * sizeof(i32))}
39 define void @test1a(i64 %d, i32* %p) nounwind uwtable ssp {
40 entry:
41 %div = udiv i64 %d, 2
42 br label %for.cond
43
44 for.cond: ; preds = %for.inc, %entry
45 %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
46 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
47 %cmp = icmp ne i32 %i.0, 64
48 br i1 %cmp, label %for.body, label %for.end
49
50 for.body: ; preds = %for.cond
51 store i32 0, i32* %p.addr.0, align 4
52 %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
53 store i32 1, i32* %add.ptr, align 4
54 %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
55 br label %for.inc
56
57 for.inc: ; preds = %for.body
58 %inc = add i32 %i.0, 1
59 br label %for.cond
60
61 for.end: ; preds = %for.cond
62 ret void
63 }