llvm.org GIT mirror llvm / 6642664
Wisely choose sext or zext when widening IV. Summary: The patch fixes regression caused by two earlier patches D18777 and D18867. Reviewers: reames, sanjoy Differential Revision: http://reviews.llvm.org/D24280 From: Li Huang git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282650 91177308-0d34-0410-b5e6-96231b3b80d8 Evgeny Stupachenko 3 years ago
2 changed file(s) with 362 addition(s) and 41 deletion(s). Raw diff Collapse all Expand all
879879 // Parameters
880880 PHINode *OrigPhi;
881881 Type *WideType;
882 bool IsSigned;
883882
884883 // Context
885884 LoopInfo *LI;
893892 const SCEV *WideIncExpr;
894893 SmallVectorImpl &DeadInsts;
895894
896 SmallPtrSet*,16> Widened;
895 SmallPtrSet *,16> Widened;
897896 SmallVector NarrowIVUsers;
897
898 enum ExtendKind { ZeroExtended, SignExtended, Unknown };
899 // A map tracking the kind of extension used to widen each narrow IV
900 // and narrow IV user.
901 // Key: pointer to a narrow IV or IV user.
902 // Value: the kind of extension used to widen this Instruction.
903 DenseMap, ExtendKind> ExtendKindMap;
898904
899905 public:
900906 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
902908 SmallVectorImpl &DI) :
903909 OrigPhi(WI.NarrowIV),
904910 WideType(WI.WidestNativeType),
905 IsSigned(WI.IsSigned),
906911 LI(LInfo),
907912 L(LI->getLoopFor(OrigPhi->getParent())),
908913 SE(SEv),
912917 WideIncExpr(nullptr),
913918 DeadInsts(DI) {
914919 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
920 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
915921 }
916922
917923 PHINode *createWideIV(SCEVExpander &Rewriter);
925931 const SCEVAddRecExpr *WideAR);
926932 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
927933
928 const SCEVAddRecExpr *getWideRecurrence(Instruction *NarrowUse);
929
930 const SCEVAddRecExpr* getExtendedOperandRecurrence(NarrowIVDefUse DU);
934 ExtendKind getExtendKind(Instruction *I);
935
936 typedef std::pair WidenedRecTy;
937
938 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
939
940 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
931941
932942 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
933943 unsigned OpCode) const;
10011011 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
10021012 // invariant and will be folded or hoisted. If it actually comes from a
10031013 // widened IV, it should be removed during a future call to widenIVUse.
1014 bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
10041015 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
10051016 ? WideDef
10061017 : createExtendInst(NarrowUse->getOperand(0), WideType,
10851096 return WideUse == WideAR;
10861097 };
10871098
1088 bool SignExtend = IsSigned;
1099 bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
10891100 if (!GuessNonIVOperand(SignExtend)) {
10901101 SignExtend = !SignExtend;
10911102 if (!GuessNonIVOperand(SignExtend))
11111122 return WideBO;
11121123 }
11131124
1125 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1126 auto It = ExtendKindMap.find(I);
1127 assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1128 return It->second;
1129 }
1130
11141131 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
11151132 unsigned OpCode) const {
11161133 if (OpCode == Instruction::Add)
11261143 /// No-wrap operations can transfer sign extension of their result to their
11271144 /// operands. Generate the SCEV value for the widened operation without
11281145 /// actually modifying the IR yet. If the expression after extending the
1129 /// operands is an AddRec for this loop, return it.
1130 const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1146 /// operands is an AddRec for this loop, return the AddRec and the kind of
1147 /// extension used.
1148 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
11311149
11321150 // Handle the common case of add
11331151 const unsigned OpCode = DU.NarrowUse->getOpcode();
11341152 // Only Add/Sub/Mul instructions supported yet.
11351153 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
11361154 OpCode != Instruction::Mul)
1137 return nullptr;
1155 return {nullptr, Unknown};
11381156
11391157 // One operand (NarrowDef) has already been extended to WideDef. Now determine
11401158 // if extending the other will lead to a recurrence.
11451163 const SCEV *ExtendOperExpr = nullptr;
11461164 const OverflowingBinaryOperator *OBO =
11471165 cast(DU.NarrowUse);
1148 if (IsSigned && OBO->hasNoSignedWrap())
1166 ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1167 if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
11491168 ExtendOperExpr = SE->getSignExtendExpr(
11501169 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1151 else if(!IsSigned && OBO->hasNoUnsignedWrap())
1170 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
11521171 ExtendOperExpr = SE->getZeroExtendExpr(
11531172 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
11541173 else
1155 return nullptr;
1174 return {nullptr, Unknown};
11561175
11571176 // When creating this SCEV expr, don't apply the current operations NSW or NUW
11581177 // flags. This instruction may be guarded by control flow that the no-wrap
11701189 dyn_cast(getSCEVByOpCode(lhs, rhs, OpCode));
11711190
11721191 if (!AddRec || AddRec->getLoop() != L)
1173 return nullptr;
1174 return AddRec;
1192 return {nullptr, Unknown};
1193
1194 return {AddRec, ExtKind};
11751195 }
11761196
11771197 /// Is this instruction potentially interesting for further simplification after
11781198 /// widening it's type? In other words, can the extend be safely hoisted out of
11791199 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1180 /// so, return the sign or zero extended recurrence. Otherwise return NULL.
1181 const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) {
1182 if (!SE->isSCEVable(NarrowUse->getType()))
1183 return nullptr;
1184
1185 const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
1200 /// so, return the extended recurrence and the kind of extension used. Otherwise
1201 /// return {nullptr, Unknown}.
1202 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1203 if (!SE->isSCEVable(DU.NarrowUse->getType()))
1204 return {nullptr, Unknown};
1205
1206 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
11861207 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
11871208 SE->getTypeSizeInBits(WideType)) {
11881209 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
11891210 // index. So don't follow this use.
1190 return nullptr;
1191 }
1192
1193 const SCEV *WideExpr = IsSigned ?
1194 SE->getSignExtendExpr(NarrowExpr, WideType) :
1195 SE->getZeroExtendExpr(NarrowExpr, WideType);
1211 return {nullptr, Unknown};
1212 }
1213
1214 const SCEV *WideExpr;
1215 ExtendKind ExtKind;
1216 if (DU.NeverNegative) {
1217 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1218 if (isa(WideExpr))
1219 ExtKind = SignExtended;
1220 else {
1221 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1222 ExtKind = ZeroExtended;
1223 }
1224 } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1225 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1226 ExtKind = SignExtended;
1227 } else {
1228 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1229 ExtKind = ZeroExtended;
1230 }
11961231 const SCEVAddRecExpr *AddRec = dyn_cast(WideExpr);
11971232 if (!AddRec || AddRec->getLoop() != L)
1198 return nullptr;
1199 return AddRec;
1233 return {nullptr, Unknown};
1234 return {AddRec, ExtKind};
12001235 }
12011236
12021237 /// This IV user cannot be widen. Replace this use of the original narrow IV
12321267 //
12331268 // (A) == icmp slt i32 sext(%narrow), sext(%val)
12341269 // == icmp slt i32 zext(%narrow), sext(%val)
1235
1270 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
12361271 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
12371272 return false;
12381273
12571292 /// Determine whether an individual user of the narrow IV can be widened. If so,
12581293 /// return the wide clone of the user.
12591294 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1295 assert(ExtendKindMap.count(DU.NarrowDef) &&
1296 "Should already know the kind of extension used to widen NarrowDef");
12601297
12611298 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
12621299 if (PHINode *UsePhi = dyn_cast(DU.NarrowUse)) {
12871324 return nullptr;
12881325 }
12891326 }
1327
1328 // This narrow use can be widened by a sext if it's non-negative or its narrow
1329 // def was widended by a sext. Same for zext.
1330 auto canWidenBySExt = [&]() {
1331 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1332 };
1333 auto canWidenByZExt = [&]() {
1334 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1335 };
1336
12901337 // Our raison d'etre! Eliminate sign and zero extension.
1291 if (IsSigned ? isa(DU.NarrowUse) : isa(DU.NarrowUse)) {
1338 if ((isa(DU.NarrowUse) && canWidenBySExt()) ||
1339 (isa(DU.NarrowUse) && canWidenByZExt())) {
12921340 Value *NewDef = DU.WideDef;
12931341 if (DU.NarrowUse->getType() != WideType) {
12941342 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
13261374 }
13271375
13281376 // Does this user itself evaluate to a recurrence after widening?
1329 const SCEVAddRecExpr *WideAddRec = getWideRecurrence(DU.NarrowUse);
1330 if (!WideAddRec)
1377 WidenedRecTy WideAddRec = getWideRecurrence(DU);
1378 if (!WideAddRec.first)
13311379 WideAddRec = getExtendedOperandRecurrence(DU);
13321380
1333 if (!WideAddRec) {
1381 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1382 if (!WideAddRec.first) {
13341383 // If use is a loop condition, try to promote the condition instead of
13351384 // truncating the IV first.
13361385 if (widenLoopCompare(DU))
13501399 // Reuse the IV increment that SCEVExpander created as long as it dominates
13511400 // NarrowUse.
13521401 Instruction *WideUse = nullptr;
1353 if (WideAddRec == WideIncExpr && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1402 if (WideAddRec.first == WideIncExpr &&
1403 Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
13541404 WideUse = WideInc;
13551405 else {
1356 WideUse = cloneIVUser(DU, WideAddRec);
1406 WideUse = cloneIVUser(DU, WideAddRec.first);
13571407 if (!WideUse)
13581408 return nullptr;
13591409 }
13621412 // evaluates to the same expression as the extended narrow use, but doesn't
13631413 // absolutely guarantee it. Hence the following failsafe check. In rare cases
13641414 // where it fails, we simply throw away the newly created wide use.
1365 if (WideAddRec != SE->getSCEV(WideUse)) {
1415 if (WideAddRec.first != SE->getSCEV(WideUse)) {
13661416 DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
1367 << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
1417 << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first << "\n");
13681418 DeadInsts.emplace_back(WideUse);
13691419 return nullptr;
13701420 }
13711421
1422 ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
13721423 // Returning WideUse pushes it on the worklist.
13731424 return WideUse;
13741425 }
14071458 return nullptr;
14081459
14091460 // Widen the induction variable expression.
1410 const SCEV *WideIVExpr = IsSigned ?
1411 SE->getSignExtendExpr(AddRec, WideType) :
1412 SE->getZeroExtendExpr(AddRec, WideType);
1461 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1462 ? SE->getSignExtendExpr(AddRec, WideType)
1463 : SE->getZeroExtendExpr(AddRec, WideType);
14131464
14141465 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
14151466 "Expect the new IV expression to preserve its type");
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -indvars -S | FileCheck %s
2
3 target datalayout = "e-m:e-i64:64-p:64:64:64-n8:16:32:64-S128"
4
5 ; When widening IV and its users, trunc and zext/sext are not needed
6 ; if the original 32-bit user is known to be non-negative, whether
7 ; the IV is considered signed or unsigned.
8 define void @foo(i32* %A, i32* %B, i32* %C, i32 %N) {
9 ; CHECK-LABEL: @foo(
10 ; CHECK-NEXT: entry:
11 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %N
12 ; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end
13 ; CHECK: for.body.lr.ph:
14 ; CHECK-NEXT: br label %for.body
15 ; CHECK: for.body:
16 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ]
17 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %B, i64 [[INDVARS_IV]]
18 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
19 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
20 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* %C, i64 [[TMP1]]
21 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
22 ; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP0]], [[TMP2]]
23 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* %A, i64 [[INDVARS_IV]]
24 ; CHECK-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX5]], align 4
25 ; CHECK-NEXT: br label %for.inc
26 ; CHECK: for.inc:
27 ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
28 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %N to i64
29 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
30 ; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge
31 ; CHECK: for.cond.for.end_crit_edge:
32 ; CHECK-NEXT: br label %for.end
33 ; CHECK: for.end:
34 ; CHECK-NEXT: ret void
35 ;
36 entry:
37 %cmp1 = icmp slt i32 0, %N
38 br i1 %cmp1, label %for.body.lr.ph, label %for.end
39
40 for.body.lr.ph: ; preds = %entry
41 br label %for.body
42
43 for.body: ; preds = %for.body.lr.ph, %for.inc
44 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
45 %idxprom = sext i32 %i.02 to i64
46 %arrayidx = getelementptr inbounds i32, i32* %B, i64 %idxprom
47 %0 = load i32, i32* %arrayidx, align 4
48 %add = add nsw i32 %i.02, 2
49 %idxprom1 = zext i32 %add to i64
50 %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %idxprom1
51 %1 = load i32, i32* %arrayidx2, align 4
52 %add3 = add nsw i32 %0, %1
53 %idxprom4 = zext i32 %i.02 to i64
54 %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4
55 store i32 %add3, i32* %arrayidx5, align 4
56 br label %for.inc
57
58 for.inc: ; preds = %for.body
59 %inc = add nsw i32 %i.02, 1
60 %cmp = icmp slt i32 %inc, %N
61 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
62
63 for.cond.for.end_crit_edge: ; preds = %for.inc
64 br label %for.end
65
66 for.end: ; preds = %for.cond.for.end_crit_edge, %entry
67 ret void
68 }
69
70 define void @foo1(i32* %A, i32* %B, i32* %C, i32 %N) {
71 ; CHECK-LABEL: @foo1(
72 ; CHECK-NEXT: entry:
73 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %N
74 ; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end
75 ; CHECK: for.body.lr.ph:
76 ; CHECK-NEXT: br label %for.body
77 ; CHECK: for.body:
78 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ]
79 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %B, i64 [[INDVARS_IV]]
80 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
81 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
82 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* %C, i64 [[TMP1]]
83 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
84 ; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP0]], [[TMP2]]
85 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* %A, i64 [[INDVARS_IV]]
86 ; CHECK-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX5]], align 4
87 ; CHECK-NEXT: br label %for.inc
88 ; CHECK: for.inc:
89 ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
90 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %N to i64
91 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
92 ; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge
93 ; CHECK: for.cond.for.end_crit_edge:
94 ; CHECK-NEXT: br label %for.end
95 ; CHECK: for.end:
96 ; CHECK-NEXT: ret void
97 ;
98 entry:
99 %cmp1 = icmp slt i32 0, %N
100 br i1 %cmp1, label %for.body.lr.ph, label %for.end
101
102 for.body.lr.ph: ; preds = %entry
103 br label %for.body
104
105 for.body: ; preds = %for.body.lr.ph, %for.inc
106 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
107 %idxprom = zext i32 %i.02 to i64
108 %arrayidx = getelementptr inbounds i32, i32* %B, i64 %idxprom
109 %0 = load i32, i32* %arrayidx, align 4
110 %add = add nsw i32 %i.02, 2
111 %idxprom1 = sext i32 %add to i64
112 %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %idxprom1
113 %1 = load i32, i32* %arrayidx2, align 4
114 %add3 = add nsw i32 %0, %1
115 %idxprom4 = sext i32 %i.02 to i64
116 %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4
117 store i32 %add3, i32* %arrayidx5, align 4
118 br label %for.inc
119
120 for.inc: ; preds = %for.body
121 %inc = add nsw i32 %i.02, 1
122 %cmp = icmp slt i32 %inc, %N
123 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
124
125 for.cond.for.end_crit_edge: ; preds = %for.inc
126 br label %for.end
127
128 for.end: ; preds = %for.cond.for.end_crit_edge, %entry
129 ret void
130 }
131
132
133 @a = common global [100 x i32] zeroinitializer, align 16
134 @b = common global [100 x i32] zeroinitializer, align 16
135
136 define i32 @foo2(i32 %M) {
137 ; CHECK-LABEL: @foo2(
138 ; CHECK-NEXT: entry:
139 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %M
140 ; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end
141 ; CHECK: for.body.lr.ph:
142 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 %M to i64
143 ; CHECK-NEXT: br label %for.body
144 ; CHECK: for.body:
145 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ]
146 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[INDVARS_IV]]
147 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
148 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 [[INDVARS_IV]]
149 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
150 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
151 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]]
152 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[TMP3]]
153 ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX5]], align 4
154 ; CHECK-NEXT: br label %for.inc
155 ; CHECK: for.inc:
156 ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
157 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %M to i64
158 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
159 ; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge
160 ; CHECK: for.cond.for.end_crit_edge:
161 ; CHECK-NEXT: br label %for.end
162 ; CHECK: for.end:
163 ; CHECK-NEXT: [[CALL:%.*]] = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0))
164 ; CHECK-NEXT: ret i32 0
165 ;
166 entry:
167 %cmp1 = icmp slt i32 0, %M
168 br i1 %cmp1, label %for.body.lr.ph, label %for.end
169
170 for.body.lr.ph: ; preds = %entry
171 br label %for.body
172
173 for.body: ; preds = %for.body.lr.ph, %for.inc
174 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
175 %idxprom = zext i32 %i.02 to i64
176 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom
177 %0 = load i32, i32* %arrayidx, align 4
178 %idxprom1 = sext i32 %i.02 to i64
179 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 %idxprom1
180 %1 = load i32, i32* %arrayidx2, align 4
181 %add = add nsw i32 %0, %1
182 %add3 = add nsw i32 %i.02, %M
183 %idxprom4 = sext i32 %add3 to i64
184 %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom4
185 store i32 %add, i32* %arrayidx5, align 4
186 br label %for.inc
187
188 for.inc: ; preds = %for.body
189 %inc = add nsw i32 %i.02, 1
190 %cmp = icmp slt i32 %inc, %M
191 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
192
193 for.cond.for.end_crit_edge: ; preds = %for.inc
194 br label %for.end
195
196 for.end: ; preds = %for.cond.for.end_crit_edge, %entry
197 %call = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0))
198 ret i32 0
199 }
200
201 declare i32 @dummy(i32*, i32*)
202
203 ; A case where zext should not be eliminated when its operands could only be extended by sext.
204 define i32 @foo3(i32 %M) {
205 ; CHECK-LABEL: @foo3(
206 ; CHECK-NEXT: entry:
207 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %M
208 ; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end
209 ; CHECK: for.body.lr.ph:
210 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 %M to i64
211 ; CHECK-NEXT: br label %for.body
212 ; CHECK: for.body:
213 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ]
214 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[INDVARS_IV]]
215 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
216 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 [[INDVARS_IV]]
217 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
218 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
219 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]]
220 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
221 ; CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP4]] to i64
222 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[IDXPROM4]]
223 ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX5]], align 4
224 ; CHECK-NEXT: br label %for.inc
225 ; CHECK: for.inc:
226 ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
227 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %M to i64
228 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
229 ; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge
230 ; CHECK: for.cond.for.end_crit_edge:
231 ; CHECK-NEXT: br label %for.end
232 ; CHECK: for.end:
233 ; CHECK-NEXT: [[CALL:%.*]] = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0))
234 ; CHECK-NEXT: ret i32 0
235 ;
236 entry:
237 %cmp1 = icmp slt i32 0, %M
238 br i1 %cmp1, label %for.body.lr.ph, label %for.end
239
240 for.body.lr.ph: ; preds = %entry
241 br label %for.body
242
243 for.body: ; preds = %for.body.lr.ph, %for.inc
244 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
245 %idxprom = sext i32 %i.02 to i64
246 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom
247 %0 = load i32, i32* %arrayidx, align 4
248 %idxprom1 = sext i32 %i.02 to i64
249 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 %idxprom1
250 %1 = load i32, i32* %arrayidx2, align 4
251 %add = add nsw i32 %0, %1
252 %add3 = add nsw i32 %i.02, %M
253 %idxprom4 = zext i32 %add3 to i64
254 %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom4
255 store i32 %add, i32* %arrayidx5, align 4
256 br label %for.inc
257
258 for.inc: ; preds = %for.body
259 %inc = add nsw i32 %i.02, 1
260 %cmp = icmp slt i32 %inc, %M
261 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
262
263 for.cond.for.end_crit_edge: ; preds = %for.inc
264 br label %for.end
265
266 for.end: ; preds = %for.cond.for.end_crit_edge, %entry
267 %call = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0))
268 ret i32 0
269 }