llvm.org GIT mirror llvm / 164cd01
[INDVARS] Extend using of widening of induction variables for the cases of "sub nsw" and "mul nsw" instructions. Currently only "add nsw" are widened. This patch eliminates tons of "sext" instructions for 64 bit code (and the corresponding target code) in cases like: int N = 100; float **A; void foo(int x0, int x1) { float * A_cur = &A[0][0]; float * A_next = &A[1][0]; for(int x = x0; x < x1; ++x). { // Currently only [x+N] case is widened. Others 2 cases lead to sext. // This patch fixes it, so all 3 cases do not need sext. const float div = A_cur[x + N] + A_cur[x - N] + A_cur[x * N]; A_next[x] = div; } } ... > clang++ test.cpp -march=core-avx2 -Ofast -fno-unroll-loops -fno-tree-vectorize -S -o - Differential Revision: http://reviews.llvm.org/D4695 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216160 91177308-0d34-0410-b5e6-96231b3b80d8 Zinovy Nis 5 years ago
2 changed file(s) with 36 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
756756
757757 const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
758758
759 const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
760 unsigned OpCode) const;
761
759762 Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
760763
761764 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
832835 }
833836 }
834837
838 const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
839 unsigned OpCode) const {
840 if (OpCode == Instruction::Add)
841 return SE->getAddExpr(LHS, RHS);
842 if (OpCode == Instruction::Sub)
843 return SE->getMinusSCEV(LHS, RHS);
844 if (OpCode == Instruction::Mul)
845 return SE->getMulExpr(LHS, RHS);
846
847 llvm_unreachable("Unsupported opcode.");
848 return nullptr;
849 }
850
835851 /// No-wrap operations can transfer sign extension of their result to their
836852 /// operands. Generate the SCEV value for the widened operation without
837853 /// actually modifying the IR yet. If the expression after extending the
838854 /// operands is an AddRec for this loop, return it.
839855 const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
856
840857 // Handle the common case of add
841 if (DU.NarrowUse->getOpcode() != Instruction::Add)
858 const unsigned OpCode = DU.NarrowUse->getOpcode();
859 // Only Add/Sub/Mul instructions supported yet.
860 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
861 OpCode != Instruction::Mul)
842862 return nullptr;
843863
844864 // One operand (NarrowDef) has already been extended to WideDef. Now determine
858878 else
859879 return nullptr;
860880
861 // When creating this AddExpr, don't apply the current operations NSW or NUW
881 // When creating this SCEV expr, don't apply the current operations NSW or NUW
862882 // flags. This instruction may be guarded by control flow that the no-wrap
863883 // behavior depends on. Non-control-equivalent instructions can be mapped to
864884 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
865885 // semantics to those operations.
866886 const SCEVAddRecExpr *AddRec = dyn_cast(
867 SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
868
887 GetSCEVByOpCode(SE->getSCEV(DU.WideDef), ExtendOperExpr, OpCode));
869888 if (!AddRec || AddRec->getLoop() != L)
870889 return nullptr;
871890 return AddRec;
0 ; RUN: opt < %s -indvars -S | FileCheck %s
11 ; Test WidenIV::GetExtendedOperandRecurrence.
2 ; add219 should be extended to i64 because it is nsw, even though its
2 ; %add, %sub and %mul should be extended to i64 because it is nsw, even though its
33 ; sext cannot be hoisted outside the loop.
44
55 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
1717 br i1 undef, label %for.body170, label %for.body153
1818
1919 ; CHECK: add nsw i64 %indvars.iv, 1
20 ; CHECK: sub nsw i64 %indvars.iv, 2
21 ; CHECK: mul nsw i64 %indvars.iv, 4
2022 for.body170: ; preds = %for.body170, %for.body153
2123 %i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
22 %add219 = add nsw i32 %i2.19, 1
23 %idxprom220 = sext i32 %add219 to i64
24
25 %add = add nsw i32 %i2.19, 1
26 %add.idxprom = sext i32 %add to i64
27
28 %sub = sub nsw i32 %i2.19, 2
29 %sub.idxprom = sext i32 %sub to i64
30
31 %mul = mul nsw i32 %i2.19, 4
32 %mul.idxprom = sext i32 %mul to i64
33
2434 %add249 = add nsw i32 %i2.19, %shl132
2535 br label %for.body170
26
2736 for.end285: ; preds = %entry
2837 ret void
2938 }