llvm.org GIT mirror llvm / 5d59315
[LSR] canonicalize Prod*(1<<C) to Prod<<C Summary: Because LSR happens at a late stage where mul of a power of 2 is typically canonicalized to shl, this canonicalization emits code that can be better CSE'ed. Test Plan: Transforms/LoopStrengthReduce/shl.ll shows how this change makes GVN more powerful. Fixes some existing tests due to this change. Reviewers: sanjoy, majnemer, atrick Reviewed By: majnemer, atrick Subscribers: majnemer, llvm-commits Differential Revision: http://reviews.llvm.org/D10448 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240573 91177308-0d34-0410-b5e6-96231b3b80d8 Jingyue Wu 4 years ago
6 changed file(s) with 56 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
2323 #include "llvm/IR/IntrinsicInst.h"
2424 #include "llvm/IR/LLVMContext.h"
2525 #include "llvm/IR/Module.h"
26 #include "llvm/IR/PatternMatch.h"
2627 #include "llvm/Support/Debug.h"
2728 #include "llvm/Support/raw_ostream.h"
2829
2930 using namespace llvm;
31 using namespace PatternMatch;
3032
3133 /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
3234 /// reusing an existing cast if a suitable one exists, moving an existing
750752 // out of loops.
751753 Value *Prod = nullptr;
752754 for (SmallVectorImpl >::iterator
753 I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
755 I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ++I) {
754756 const SCEV *Op = I->second;
755757 if (!Prod) {
756758 // This is the first operand. Just expand it.
757759 Prod = expand(Op);
758 ++I;
759760 } else if (Op->isAllOnesValue()) {
760761 // Instead of doing a multiply by negative one, just do a negate.
761762 Prod = InsertNoopCastOfTo(Prod, Ty);
762763 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
763 ++I;
764764 } else {
765765 // A simple mul.
766766 Value *W = expandCodeFor(Op, Ty);
767767 Prod = InsertNoopCastOfTo(Prod, Ty);
768768 // Canonicalize a constant to the RHS.
769769 if (isa(Prod)) std::swap(Prod, W);
770 Prod = InsertBinop(Instruction::Mul, Prod, W);
771 ++I;
770 const APInt *RHS;
771 if (match(W, m_Power2(RHS))) {
772 // Canonicalize Prod*(1<
773 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
774 Prod = InsertBinop(Instruction::Shl, Prod,
775 ConstantInt::get(Ty, RHS->logBase2()));
776 } else {
777 Prod = InsertBinop(Instruction::Mul, Prod, W);
778 }
772779 }
773780 }
774781
6868 ret void
6969 ; CHECK-LABEL: @test2(
7070 ; CHECK: br i1 %cmp10,
71 ; CHECK: %0 = mul i64 %Size, 4
71 ; CHECK: %0 = shl i64 %Size, 2
7272 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false)
7373 ; CHECK-NOT: store
7474 }
5151 ; CHECK: %0 = add i32 %n, -1
5252 ; CHECK: %1 = sub i32 %0, %m
5353 ; CHECK: %2 = lshr i32 %1, 2
54 ; CHECK: %3 = mul i32 %2, 4
54 ; CHECK: %3 = shl i32 %2, 2
5555 ; CHECK: %4 = add i32 %m, %3
5656 ; CHECK: %5 = add i32 %4, 3
5757 ; CHECK: br label %for.body
131131 ; CHECK: %0 = add i32 %n, -1
132132 ; CHECK: %1 = sub i32 %0, %rem
133133 ; CHECK: %2 = lshr i32 %1, 2
134 ; CHECK: %3 = mul i32 %2, 4
134 ; CHECK: %3 = shl i32 %2, 2
135135 ; CHECK: %4 = add i32 %rem, %3
136136 ; CHECK: %5 = add i32 %4, 3
137137 ; CHECK: br label %for.body
1111 ; CHECK-LABEL: @test(
1212 ; multiplies are hoisted out of the loop
1313 ; CHECK: while.body.lr.ph:
14 ; CHECK: mul i64
15 ; CHECK: mul i64
14 ; CHECK: shl i64
15 ; CHECK: shl i64
1616 ; GEPs are ugly
1717 ; CHECK: while.body:
1818 ; CHECK: phi
55
66 ; CHECK: [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
77 ; CHECK: [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1
8 ; CHECK: [[r3:%[a-z0-9]+]] = mul i64 [[r2]], 2
8 ; CHECK: [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1
99 ; CHECK: br label %for.body
1010 ; CHECK: for.body:
1111 ; CHECK: %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ]
0 ; RUN: opt < %s -loop-reduce -gvn -S | FileCheck %s
1
2 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
3
4 define void @_Z3fooPfll(float* nocapture readonly %input, i64 %n, i64 %s) {
5 ; CHECK-LABEL: @_Z3fooPfll(
6 entry:
7 %mul = shl nsw i64 %s, 2
8 ; CHECK: %mul = shl i64 %s, 2
9 tail call void @_Z3bazl(i64 %mul) #2
10 ; CHECK-NEXT: call void @_Z3bazl(i64 %mul)
11 %cmp.5 = icmp sgt i64 %n, 0
12 br i1 %cmp.5, label %for.body.preheader, label %for.cond.cleanup
13
14 for.body.preheader: ; preds = %entry
15 br label %for.body
16
17 for.cond.cleanup.loopexit: ; preds = %for.body
18 br label %for.cond.cleanup
19
20 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
21 ret void
22
23 for.body: ; preds = %for.body.preheader, %for.body
24 %i.06 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ]
25 %arrayidx = getelementptr inbounds float, float* %input, i64 %i.06
26 ; LoopStrengthReduce should reuse %mul as the stride.
27 ; CHECK: getelementptr i1, i1* {{[^,]+}}, i64 %mul
28 %0 = load float, float* %arrayidx, align 4
29 tail call void @_Z3barf(float %0) #2
30 %add = add nsw i64 %i.06, %s
31 %cmp = icmp slt i64 %add, %n
32 br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
33 }
34
35 declare void @_Z3bazl(i64)
36
37 declare void @_Z3barf(float)