llvm.org GIT mirror llvm / 83c8e73
Disable indvar widening if arithmetics on the wider type are more expensive Summary: Reapply r221772. The old patch breaks the bot because the @indvar_32_bit test was run whether NVPTX was enabled or not. IndVarSimplify should not widen an indvar if arithmetics on the wider indvar are more expensive than those on the narrower indvar. For instance, although NVPTX64 treats i64 as a legal type, an ADD on i64 is twice as expensive as that on i32, because the hardware needs to simulate a 64-bit integer using two 32-bit integers. Split from D6188, and based on D6195 which adds NVPTXTargetTransformInfo. Fixes PR21148. Test Plan: Added @indvar_32_bit that verifies we do not widen an indvar if the arithmetics on the wider type are more expensive. This test is run only when NVPTX is enabled. Reviewers: jholewinski, eliben, meheff, atrick Reviewed By: atrick Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D6196 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221799 91177308-0d34-0410-b5e6-96231b3b80d8 Jingyue Wu 4 years ago
4 changed file(s) with 108 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
3535 namespace {
3636
3737 class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo {
38 const NVPTXTargetLowering *TLI;
3839 public:
39 NVPTXTTI() : ImmutablePass(ID) {
40 NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) {
4041 llvm_unreachable("This pass cannot be directly constructed");
4142 }
4243
43 NVPTXTTI(const NVPTXTargetMachine *TM) : ImmutablePass(ID) {
44 NVPTXTTI(const NVPTXTargetMachine *TM)
45 : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) {
4446 initializeNVPTXTTIPass(*PassRegistry::getPassRegistry());
4547 }
4648
6264
6365 bool hasBranchDivergence() const override;
6466
67 unsigned getArithmeticInstrCost(
68 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
69 OperandValueKind Opd2Info = OK_AnyValue,
70 OperandValueProperties Opd1PropInfo = OP_None,
71 OperandValueProperties Opd2PropInfo = OP_None) const override;
72
6573 /// @}
6674 };
6775
7785 }
7886
7987 bool NVPTXTTI::hasBranchDivergence() const { return true; }
88
89 unsigned NVPTXTTI::getArithmeticInstrCost(
90 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
91 OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
92 OperandValueProperties Opd2PropInfo) const {
93 // Legalize the type.
94 std::pair LT = TLI->getTypeLegalizationCost(Ty);
95
96 int ISD = TLI->InstructionOpcodeToISD(Opcode);
97
98 switch (ISD) {
99 default:
100 return TargetTransformInfo::getArithmeticInstrCost(
101 Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
102 case ISD::ADD:
103 case ISD::MUL:
104 case ISD::XOR:
105 case ISD::OR:
106 case ISD::AND:
107 // The machine code (SASS) simulates an i64 with two i32. Therefore, we
108 // estimate that arithmetic operations on i64 are twice as expensive as
109 // those on types that can fit into one machine register.
110 if (LT.second.SimpleTy == MVT::i64)
111 return 2 * LT.first;
112 // Delegate other cases to the basic TTI.
113 return TargetTransformInfo::getArithmeticInstrCost(
114 Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
115 }
116 }
3030 #include "llvm/Analysis/LoopInfo.h"
3131 #include "llvm/Analysis/LoopPass.h"
3232 #include "llvm/Analysis/ScalarEvolutionExpander.h"
33 #include "llvm/Analysis/TargetTransformInfo.h"
3334 #include "llvm/IR/BasicBlock.h"
3435 #include "llvm/IR/CFG.h"
3536 #include "llvm/IR/Constants.h"
6869
6970 namespace {
7071 class IndVarSimplify : public LoopPass {
71 LoopInfo *LI;
72 ScalarEvolution *SE;
73 DominatorTree *DT;
74 const DataLayout *DL;
75 TargetLibraryInfo *TLI;
72 LoopInfo *LI;
73 ScalarEvolution *SE;
74 DominatorTree *DT;
75 const DataLayout *DL;
76 TargetLibraryInfo *TLI;
77 const TargetTransformInfo *TTI;
7678
7779 SmallVector DeadInsts;
7880 bool Changed;
660662 /// extended by this sign or zero extend operation. This is used to determine
661663 /// the final width of the IV before actually widening it.
662664 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
663 const DataLayout *DL) {
665 const DataLayout *DL, const TargetTransformInfo *TTI) {
664666 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
665667 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
666668 return;
669671 uint64_t Width = SE->getTypeSizeInBits(Ty);
670672 if (DL && !DL->isLegalInteger(Width))
671673 return;
674
675 // Cast is either an sext or zext up to this point.
676 // We should not widen an indvar if arithmetics on the wider indvar are more
677 // expensive than those on the narrower indvar. We check only the cost of ADD
678 // because at least an ADD is required to increment the induction variable. We
679 // could compute more comprehensively the cost of all instructions on the
680 // induction variable when necessary.
681 if (TTI &&
682 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
683 TTI->getArithmeticInstrCost(Instruction::Add,
684 Cast->getOperand(0)->getType())) {
685 return;
686 }
672687
673688 if (!WI.WidestNativeType) {
674689 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
11861201 class IndVarSimplifyVisitor : public IVVisitor {
11871202 ScalarEvolution *SE;
11881203 const DataLayout *DL;
1204 const TargetTransformInfo *TTI;
11891205 PHINode *IVPhi;
11901206
11911207 public:
11921208 WideIVInfo WI;
11931209
11941210 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1195 const DataLayout *DL, const DominatorTree *DTree):
1196 SE(SCEV), DL(DL), IVPhi(IV) {
1211 const DataLayout *DL, const TargetTransformInfo *TTI,
1212 const DominatorTree *DTree)
1213 : SE(SCEV), DL(DL), TTI(TTI), IVPhi(IV) {
11971214 DT = DTree;
11981215 WI.NarrowIV = IVPhi;
11991216 if (ReduceLiveIVs)
12011218 }
12021219
12031220 // Implement the interface used by simplifyUsersOfIV.
1204 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, DL); }
1221 void visitCast(CastInst *Cast) override {
1222 visitIVCast(Cast, WI, SE, DL, TTI);
1223 }
12051224 };
12061225 }
12071226
12351254 PHINode *CurrIV = LoopPhis.pop_back_val();
12361255
12371256 // Information about sign/zero extensions of CurrIV.
1238 IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, DT);
1257 IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, TTI, DT);
12391258
12401259 Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
12411260
18941913 DataLayoutPass *DLP = getAnalysisIfAvailable();
18951914 DL = DLP ? &DLP->getDataLayout() : nullptr;
18961915 TLI = getAnalysisIfAvailable();
1916 TTI = getAnalysisIfAvailable();
18971917
18981918 DeadInsts.clear();
18991919 Changed = false;
0 if not 'NVPTX' in config.root.targets:
1 config.unsupported = True
0 ; RUN: opt < %s -indvars -S | FileCheck %s
1
2 target triple = "nvptx64-unknown-unknown"
3
4 ; For the nvptx64 architecture, the cost of an arithmetic instruction on a
5 ; 64-bit integer is twice as expensive as that on a 32-bit integer, because the
6 ; hardware needs to simulate a 64-bit integer using two 32-bit integers.
7 ; Therefore, in this particular architecture, we should not widen induction
8 ; variables to 64-bit integers even though i64 is a legal type in the 64-bit
9 ; PTX ISA.
10
11 define void @indvar_32_bit(i32 %n, i32* nocapture %output) {
12 ; CHECK-LABEL: @indvar_32_bit
13 entry:
14 %cmp5 = icmp sgt i32 %n, 0
15 br i1 %cmp5, label %for.body.preheader, label %for.end
16
17 for.body.preheader: ; preds = %entry
18 br label %for.body
19
20 for.body: ; preds = %for.body.preheader, %for.body
21 %i.06 = phi i32 [ 0, %for.body.preheader ], [ %add, %for.body ]
22 ; CHECK: phi i32
23 %mul = mul nsw i32 %i.06, %i.06
24 %0 = sext i32 %i.06 to i64
25 %arrayidx = getelementptr inbounds i32* %output, i64 %0
26 store i32 %mul, i32* %arrayidx, align 4
27 %add = add nsw i32 %i.06, 3
28 %cmp = icmp slt i32 %add, %n
29 br i1 %cmp, label %for.body, label %for.end.loopexit
30
31 for.end.loopexit: ; preds = %for.body
32 br label %for.end
33
34 for.end: ; preds = %for.end.loopexit, %entry
35 ret void
36 }