llvm.org GIT mirror llvm / 600f7b5
Revert "[SCEV] Use wrap flags in InsertBinop" This reverts commit r362687. Miscompiles llvm-profdata during selfhost. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362699 91177308-0d34-0410-b5e6-96231b3b80d8 Benjamin Kramer 4 months ago
28 changed file(s) with 70 addition(s) and 83 deletion(s). Raw diff Collapse all Expand all
317317 /// avoid inserting an obviously redundant operation, and hoisting to an
318318 /// outer loop when the opportunity is there and it is safe.
319319 Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
320 SCEV::NoWrapFlags Flags, bool IsSafeToHoist);
320 bool IsSafeToHoist);
321321
322322 /// Arrange for there to be a cast of V to Ty at IP, reusing an existing
323323 /// cast if a suitable one exists, moving an existing cast if a suitable one
168168 /// of work to avoid inserting an obviously redundant operation, and hoisting
169169 /// to an outer loop when the opportunity is there and it is safe.
170170 Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
171 Value *LHS, Value *RHS,
172 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
171 Value *LHS, Value *RHS, bool IsSafeToHoist) {
173172 // Fold a binop with constant operands.
174173 if (Constant *CLHS = dyn_cast(LHS))
175174 if (Constant *CRHS = dyn_cast(RHS))
188187 if (isa(IP))
189188 ScanLimit++;
190189
191 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
192 // Ensure that no-wrap flags match.
193 if (isa(I)) {
194 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
195 return true;
196 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
197 return true;
198 }
199 // Conservatively, do not use any instruction which has any of exact
200 // flags installed.
190 // Conservatively, do not use any instruction which has any of wrap/exact
191 // flags installed.
192 // TODO: Instead of simply disable poison instructions we can be clever
193 // here and match SCEV to this instruction.
194 auto canGeneratePoison = [](Instruction *I) {
195 if (isa(I) &&
196 (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
197 return true;
201198 if (isa(I) && I->isExact())
202199 return true;
203200 return false;
204201 };
205202 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
206 IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
203 IP->getOperand(1) == RHS && !canGeneratePoison(&*IP))
207204 return &*IP;
208205 if (IP == BlockBegin) break;
209206 }
228225 // If we haven't found this binop, insert it.
229226 Instruction *BO = cast(Builder.CreateBinOp(Opcode, LHS, RHS));
230227 BO->setDebugLoc(Loc);
231 if (Flags & SCEV::FlagNUW)
232 BO->setHasNoUnsignedWrap();
233 if (Flags & SCEV::FlagNSW)
234 BO->setHasNoSignedWrap();
235228 rememberInstruction(BO);
236229
237230 return BO;
743736 // Instead of doing a negate and add, just do a subtract.
744737 Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
745738 Sum = InsertNoopCastOfTo(Sum, Ty);
746 Sum = InsertBinop(Instruction::Sub, Sum, W, S->getNoWrapFlags(),
747 /*IsSafeToHoist*/ true);
739 Sum = InsertBinop(Instruction::Sub, Sum, W, /*IsSafeToHoist*/ true);
748740 ++I;
749741 } else {
750742 // A simple add.
752744 Sum = InsertNoopCastOfTo(Sum, Ty);
753745 // Canonicalize a constant to the RHS.
754746 if (isa(Sum)) std::swap(Sum, W);
755 Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
756 /*IsSafeToHoist*/ true);
747 Sum = InsertBinop(Instruction::Add, Sum, W, /*IsSafeToHoist*/ true);
757748 ++I;
758749 }
759750 }
782773 // Expand the calculation of X pow N in the following manner:
783774 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
784775 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
785 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty, &S]() {
776 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() {
786777 auto E = I;
787778 // Calculate how many times the same operand from the same loop is included
788779 // into this power.
805796 if (Exponent & 1)
806797 Result = P;
807798 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
808 P = InsertBinop(Instruction::Mul, P, P, S->getNoWrapFlags(),
809 /*IsSafeToHoist*/ true);
799 P = InsertBinop(Instruction::Mul, P, P, /*IsSafeToHoist*/ true);
810800 if (Exponent & BinExp)
811801 Result = Result ? InsertBinop(Instruction::Mul, Result, P,
812 S->getNoWrapFlags(),
813802 /*IsSafeToHoist*/ true)
814803 : P;
815804 }
827816 // Instead of doing a multiply by negative one, just do a negate.
828817 Prod = InsertNoopCastOfTo(Prod, Ty);
829818 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
830 S->getNoWrapFlags(),
831819 /*IsSafeToHoist*/ true);
832820 ++I;
833821 } else {
842830 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
843831 Prod = InsertBinop(Instruction::Shl, Prod,
844832 ConstantInt::get(Ty, RHS->logBase2()),
845 S->getNoWrapFlags(), /*IsSafeToHoist*/ true);
833 /*IsSafeToHoist*/ true);
846834 } else {
847 Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
848 /*IsSafeToHoist*/ true);
835 Prod = InsertBinop(Instruction::Mul, Prod, W, /*IsSafeToHoist*/ true);
849836 }
850837 }
851838 }
862849 if (RHS.isPowerOf2())
863850 return InsertBinop(Instruction::LShr, LHS,
864851 ConstantInt::get(Ty, RHS.logBase2()),
865 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
852 /*IsSafeToHoist*/ true);
866853 }
867854
868855 Value *RHS = expandCodeFor(S->getRHS(), Ty);
869 return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
856 return InsertBinop(Instruction::UDiv, LHS, RHS,
870857 /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
871858 }
872859
22 ; Make sure that we generate correct runtime checks.
33
44 ; CHECK: b7.old:
5 ; CHECK: [[LEN:%[0-9]+]] = shl nuw i32 %len, 3
5 ; CHECK: [[LEN:%[0-9]+]] = shl i32 %len, 3
66 ; CHECK: [[SRC:%[0-9]+]] = ptrtoint i8* %src to i32
77 ; CHECK: [[DST:%[0-9]+]] = ptrtoint i8* %dst to i32
88 ; CHECK: [[ULT:%[0-9]+]] = icmp ult i32 [[DST]], [[SRC]]
8888 ; CHECK: entry:
8989 ; CHECK-NEXT: %num = load i64, i64* %p1, align 4
9090 ; CHECK-NEXT: [[DIV:%[^ ]+]] = udiv i64 %num, 13
91 ; CHECK-NEXT: [[DIV_MINUS_1:%[^ ]+]] = add nsw i64 [[DIV]], -1
91 ; CHECK-NEXT: [[DIV_MINUS_1:%[^ ]+]] = add i64 [[DIV]], -1
9292 ; CHECK-NEXT: [[COMP1:%[^ ]+]] = icmp sgt i64 [[DIV_MINUS_1]], 0
9393 ; CHECK-NEXT: %exit.mainloop.at = select i1 [[COMP1]], i64 [[DIV_MINUS_1]], i64 0
9494 ; CHECK-NEXT: [[COMP2:%[^ ]+]] = icmp slt i64 0, %exit.mainloop.at
44 ; CHECK-LABEL: @f_0(
55
66 ; CHECK: loop.preheader:
7 ; CHECK: [[len_sub:[^ ]+]] = add nsw i32 %len, -4
7 ; CHECK: [[len_sub:[^ ]+]] = add i32 %len, -4
88 ; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[len_sub]]
99 ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[len_sub]]
1010 ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
3232 ; CHECK: [[len_hiclamp:[^ ]+]] = select i1 [[len_hiclamp_cmp]], i32 %len, i32 %n
3333 ; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0
3434 ; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0
35 ; CHECK: %exit.preloop.at = add nsw i32 [[not_exit_preloop_at]], -1
35 ; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1
3636 }
3737
3838 ; Make sure that we can eliminate the range check when the loop looks like:
2222 ; CHECK-NOT: preloop
2323 ; CHECK: entry:
2424 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
25 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nsw i32 %len, -13
25 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13
2626 ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
2727 ; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101
2828 ; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0
7878 ; CHECK-LABEL: test_02(
7979 ; CHECK: entry:
8080 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
81 ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add nuw nsw i32 %len, -2147483647
81 ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
8282 ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
8383 ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
84 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub nuw nsw i32 %len, [[SMAX1]]
84 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]]
8585 ; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
8686 ; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101
8787 ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0
201201 ; CHECK-LABEL: test_04(
202202 ; CHECK: entry:
203203 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
204 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nuw i32 %len, 13
204 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13
205205 ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101
206206 ; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101
207207 ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader
244244 ; CHECK-NOT: preloop
245245 ; CHECK: entry:
246246 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
247 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nsw i32 %len, -13
247 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13
248248 ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
249249 ; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101
250250 ; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0
285285 ; CHECK-LABEL: test_06(
286286 ; CHECK: entry:
287287 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
288 ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add nuw nsw i32 %len, -2147483647
288 ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
289289 ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
290290 ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
291 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub nuw nsw i32 %len, [[SMAX1]]
291 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]]
292292 ; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
293293 ; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101
294294 ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0
374374 ; CHECK-LABEL: test_08(
375375 ; CHECK: entry:
376376 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
377 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nuw i32 %len, 13
377 ; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13
378378 ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101
379379 ; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101
380380 ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader
114114 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647
115115 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
116116 ; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
117 ; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 [[BOUND]], [[SMIN]]
117 ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]]
118118 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0
119119 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0
120120 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1
402402 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647
403403 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
404404 ; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
405 ; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 [[BOUND]], [[SMIN]]
405 ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]]
406406 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0
407407 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0
408408 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1
8585 ; CHECK-LABEL: @single_access_no_preloop_with_offset(
8686
8787 ; CHECK: loop.preheader:
88 ; CHECK: [[safe_range_end:[^ ]+]] = add nsw i32 %len, -4
88 ; CHECK: [[safe_range_end:[^ ]+]] = add i32 %len, -4
8989 ; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]]
9090 ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[safe_range_end]]
9191 ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
3333 ; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647
3434 ; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647
3535 ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version.
36 ; CHECK: [[safe_start:[^ ]+]] = sub nsw i32 0, [[safe_offset_preloop]]
36 ; CHECK: [[safe_start:[^ ]+]] = sub i32 0, [[safe_offset_preloop]]
3737 ; CHECK: [[exit_preloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_start]]
3838 ; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = select i1 [[exit_preloop_at_cond_loclamp]], i32 %n, i32 [[safe_start]]
3939 ; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0
4040 ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0
4141
4242
43 ; CHECK: [[len_minus_sint_max:[^ ]+]] = add nuw nsw i32 %len, -2147483647
43 ; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647
4444 ; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]]
4545 ; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]]
4646 ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version.
47 ; CHECK: [[safe_upper_end:[^ ]+]] = sub nsw i32 %len, [[safe_offset_mainloop]]
47 ; CHECK: [[safe_upper_end:[^ ]+]] = sub i32 %len, [[safe_offset_mainloop]]
4848 ; CHECK: [[exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_upper_end]]
4949 ; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_loclamp]], i32 %n, i32 [[safe_upper_end]]
5050 ; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0
5151 ; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0
52 ; CHECK: [[safe_lower_end:[^ ]+]] = sub nsw i32 2147483647, [[safe_offset_mainloop_2]]
52 ; CHECK: [[safe_lower_end:[^ ]+]] = sub i32 2147483647, [[safe_offset_mainloop_2]]
5353 ; CHECK: [[exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp slt i32 [[exit_mainloop_at_loclamp]], [[safe_lower_end]]
5454 ; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_hiclamp]], i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]]
5555 ; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0
253253 ; CHECK: @test_05(
254254 ; CHECK: entry:
255255 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr
256 ; CHECK-NEXT: %exit.preloop.at = add nsw i32 %len, -1
256 ; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1
257257 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp sgt i32 100, %exit.preloop.at
258258 ; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit
259259 ; CHECK: loop.preloop.preheader:
319319 ; CHECK: @test_06(
320320 ; CHECK: entry:
321321 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr
322 ; CHECK-NEXT: %exit.preloop.at = add nsw i32 %len, -1
322 ; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1
323323 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 2147483640, %exit.preloop.at
324324 ; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit
325325 ; CHECK: loop.preloop.preheader:
414414 ; CHECK: @test_08(
415415 ; CHECK: entry:
416416 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr
417 ; CHECK-NEXT: %exit.preloop.at = add nsw i32 %len, -1
417 ; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1
418418 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 2147483647, %exit.preloop.at
419419 ; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit
420420 ; CHECK: loop.preloop.preheader:
5858 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
5959 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
6060 ; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
61 ; CHECK-NEXT: %exit.preloop.at = add nsw i32 [[UMIN]], -1
61 ; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1
6262 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at
6363 ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
6464 ; CHECK: mainloop:
149149 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
150150 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
151151 ; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
152 ; CHECK-NEXT: %exit.preloop.at = add nsw i32 [[UMIN]], -1
152 ; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1
153153 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at
154154 ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
155155 ; CHECK: mainloop:
6161 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
6262 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
6363 ; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
64 ; CHECK-NEXT: %exit.preloop.at = add nsw i32 [[UMIN]], -1
64 ; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1
6565 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at
6666 ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
6767 ; CHECK: mainloop:
194194 ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
195195 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
196196 ; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
197 ; CHECK-NEXT: %exit.preloop.at = add nsw i32 [[UMIN]], -1
197 ; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1
198198 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at
199199 ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
200200 ; CHECK: mainloop:
201201 ; CHECK: loop.preheader:
202202 ; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N]], 3
203203 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[TMP0]], 2
204 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP1]], 1
204 ; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], 1
205205 ; CHECK-NEXT: br label [[LOOP:%.*]]
206206 ; CHECK: loop:
207207 ; CHECK-NEXT: [[I1:%.*]] = phi i8 [ [[I1_INC:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
2424 ; CHECK-LABEL: @test2
2525 ; CHECK: [[VAR1:%.+]] = add i32 %arg, -11
2626 ; CHECK: [[VAR2:%.+]] = lshr i32 [[VAR1]], 1
27 ; CHECK: [[VAR3:%.+]] = add nuw i32 [[VAR2]], 1
27 ; CHECK: [[VAR3:%.+]] = add i32 [[VAR2]], 1
2828 ; CHECK: [[VAR4:%.+]] = phi i32 [ 0, %bb ], [ [[VAR3]], %bb1.preheader ]
2929 ; CHECK: ret i32 [[VAR4]]
3030 define i32 @test2(i32 %arg) {
281281 ;; memcpy.atomic formation (atomic load & store) -- element size 2
282282 define void @test6(i64 %Size) nounwind ssp {
283283 ; CHECK-LABEL: @test6(
284 ; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 1
284 ; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 1
285285 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 [[Sz]], i32 2)
286286 ; CHECK-NOT: store
287287 ; CHECK: ret void
307307 ;; memcpy.atomic formation (atomic load & store) -- element size 4
308308 define void @test7(i64 %Size) nounwind ssp {
309309 ; CHECK-LABEL: @test7(
310 ; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 2
310 ; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 2
311311 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 [[Sz]], i32 4)
312312 ; CHECK-NOT: store
313313 ; CHECK: ret void
333333 ;; memcpy.atomic formation (atomic load & store) -- element size 8
334334 define void @test8(i64 %Size) nounwind ssp {
335335 ; CHECK-LABEL: @test8(
336 ; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 3
336 ; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 3
337337 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 [[Sz]], i32 8)
338338 ; CHECK-NOT: store
339339 ; CHECK: ret void
359359 ;; memcpy.atomic formation rejection (atomic load & store) -- element size 16
360360 define void @test9(i64 %Size) nounwind ssp {
361361 ; CHECK-LABEL: @test9(
362 ; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 4
362 ; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 4
363363 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 [[Sz]], i32 16)
364364 ; CHECK-NOT: store
365365 ; CHECK: ret void
4545 ret void
4646 ; CHECK-LABEL: @test1_i16(
4747 ; CHECK: %[[BaseBC:.*]] = bitcast i16* %Base to i8*
48 ; CHECK: %[[Sz:[0-9]+]] = shl nuw i64 %Size, 1
48 ; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 1
4949 ; CHECK: call void @llvm.memset.p0i8.i64(i8* align 2 %[[BaseBC]], i8 0, i64 %[[Sz]], i1 false)
5050 ; CHECK-NOT: store
5151 }
9191 ret void
9292 ; CHECK-LABEL: @test2(
9393 ; CHECK: br i1 %cmp10,
94 ; CHECK: %0 = shl nuw i64 %Size, 2
94 ; CHECK: %0 = shl i64 %Size, 2
9595 ; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %Base1, i8 1, i64 %0, i1 false)
9696 ; CHECK-NOT: store
9797 }
211211 ; CHECK-LABEL: @test6_dest_align(
212212 ; CHECK: %[[Dst:.*]] = bitcast i32* %Dest to i8*
213213 ; CHECK: %[[Src:.*]] = bitcast i32* %Base to i8*
214 ; CHECK: %[[Sz:[0-9]+]] = shl nuw i64 %Size, 2
214 ; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2
215215 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[Dst]], i8* align 1 %[[Src]], i64 %[[Sz]], i1 false)
216216 ; CHECK-NOT: store
217217 ; CHECK: ret void
237237 ; CHECK-LABEL: @test6_src_align(
238238 ; CHECK: %[[Dst]] = bitcast i32* %Dest to i8*
239239 ; CHECK: %[[Src]] = bitcast i32* %Base to i8*
240 ; CHECK: %[[Sz:[0-9]+]] = shl nuw i64 %Size, 2
240 ; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2
241241 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %[[Dst]], i8* align 4 %[[Src]], i64 %[[Sz]], i1 false)
242242 ; CHECK-NOT: store
243243 ; CHECK: ret void
652652 br label %loop.body
653653 ; CHECK: loop.ph:
654654 ; CHECK-NEXT: %[[ZEXT_SIZE:.*]] = zext i32 %size to i64
655 ; CHECK-NEXT: %[[SCALED_SIZE:.*]] = shl nuw nsw i64 %[[ZEXT_SIZE]], 3
655 ; CHECK-NEXT: %[[SCALED_SIZE:.*]] = shl i64 %[[ZEXT_SIZE]], 3
656656 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %{{.*}}, i8 0, i64 %[[SCALED_SIZE]], i1 false)
657657
658658 loop.body:
684684 br label %loop.body
685685 ; CHECK: loop.ph:
686686 ; CHECK-NEXT: %[[ZEXT_SIZE:.*]] = zext i32 %size to i64
687 ; CHECK-NEXT: %[[SCALED_SIZE:.*]] = shl nuw nsw i64 %[[ZEXT_SIZE]], 3
687 ; CHECK-NEXT: %[[SCALED_SIZE:.*]] = shl i64 %[[ZEXT_SIZE]], 3
688688 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i64 %[[SCALED_SIZE]], i1 false)
689689
690690 loop.body:
1212 ; CHECK-NEXT: bb.nph:
1313 ; CHECK-NEXT: [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8*
1414 ; CHECK-NEXT: [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
15 ; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2, !dbg !18
15 ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SIZE:%.*]], 2, !dbg !18
1616 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST1]], i8* align 1 [[BASE2]], i64 [[TMP0]], i1 false), !dbg !19
1717 ; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !18
1818 ; CHECK: for.body:
744744 ; CHECK-LABEL: @pointer_bitcast_baseinst(
745745 ; CHECK: bb3:
746746 ; CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ]
747 ; CHECK-NEXT: %4 = shl nuw i64 %indvar, 3
747 ; CHECK-NEXT: %4 = shl i64 %indvar, 3
748748 ; CHECK-NEXT: %5 = add i64 %4, 1
749749 ; CHECK-NEXT: %tmp5 = shl nuw i64 %5, 1
750750 ; CHECK-NEXT: %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5
103103 ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
104104 ;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
105105 ;CHECK-NEXT: %4 = trunc i64 %indvar to i32
106 ;CHECK-NEXT: %5 = mul nsw i64 %indvar, -1
106 ;CHECK-NEXT: %5 = mul i64 %indvar, -1
107107 ;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %5
108108 ;CHECK-NEXT: %6 = load i32, i32* %scevgep, align 4
109109 ;CHECK-NEXT: %add = add nsw i32 %6, %S.012
5151 ; CHECK: %0 = add i32 %n, -1
5252 ; CHECK: %1 = sub i32 %0, %m
5353 ; CHECK: %2 = lshr i32 %1, 2
54 ; CHECK: %3 = shl nuw i32 %2, 2
54 ; CHECK: %3 = shl i32 %2, 2
5555 ; CHECK: %4 = add i32 %3, 3
5656 ; CHECK: br label %for.body
5757
130130 ; CHECK: %0 = add i32 %n, -1
131131 ; CHECK: %1 = sub i32 %0, %rem
132132 ; CHECK: %2 = lshr i32 %1, 2
133 ; CHECK: %3 = shl nuw i32 %2, 2
133 ; CHECK: %3 = shl i32 %2, 2
134134 ; CHECK: %4 = add i32 %3, 3
135135 ; CHECK: br label %for.body
136136
5151 ;CHECK-LABEL: while.body:
5252 ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
5353 ;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
54 ;CHECK-NEXT: %4 = mul nsw i64 %indvar, -1
54 ;CHECK-NEXT: %4 = mul i64 %indvar, -1
5555 ;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %4
5656 ;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4
5757 ;CHECK-NEXT: %add = add nsw i32 %5, %S.011
1111 ; CHECK-LABEL: @test(
1212 ; multiplies are hoisted out of the loop
1313 ; CHECK: while.body.lr.ph:
14 ; CHECK: shl nsw i64
15 ; CHECK: shl nsw i64
14 ; CHECK: shl i64
15 ; CHECK: shl i64
1616 ; GEPs are ugly
1717 ; CHECK: while.body:
1818 ; CHECK: phi
2828 ; CHECK-LABEL: for.end:
2929 ; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next, 0
3030 ; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0
31 ; CHECK: %0 = sub nuw nsw i32 0, %sub.us
31 ; CHECK: %0 = sub i32 0, %sub.us
3232 ; CHECK: %1 = sub i32 %0, %lsr.iv.next
3333 ; CHECK: %sext.us = mul i32 %lsr.iv.next2, %1
3434 ; CHECK: %f = ashr i32 %sext.us, 24
1313 ; CHECK-NEXT: [[CMP215:%.*]] = icmp sgt i32 [[SIZE:%.*]], 1
1414 ; CHECK-NEXT: [[T0:%.*]] = zext i32 [[SIZE]] to i64
1515 ; CHECK-NEXT: [[T1:%.*]] = sext i32 [[NSTEPS:%.*]] to i64
16 ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[T0]], -1
16 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[T0]], -1
1717 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1818 ; CHECK: for.body:
1919 ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[FOR_INC:%.*]] ], [ 1, [[ENTRY:%.*]] ]
77 ; CHECK: [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1
88 ; CHECK: [[r3:%[a-z0-9\.]+]] = bitcast i64 [[r2]] to i64
99 ; CHECK: for.body.lr.ph:
10 ; CHECK: [[r4:%[a-z0-9]+]] = shl nuw i64 [[r3]], 1
10 ; CHECK: [[r4:%[a-z0-9]+]] = shl i64 [[r3]], 1
1111 ; CHECK: br label %for.body
1212 ; CHECK: for.body:
1313 ; CHECK: %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r4]], %for.body.lr.ph ]
2828 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4
2929 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3030 ; CHECK: vector.scevcheck:
31 ; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i64 3, [[TMP0]]
31 ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 3, [[TMP0]]
3232 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[G_0]], [[CONV]]
3333 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP2]] to i32
3434 ; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP4]])
2626 ; CHECK: for.body3.lr.ph.us.preheader:
2727 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], -1
2828 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
29 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
29 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 1
3030 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[K:%.*]] to i64
3131 ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]]
3232 ; CHECK: for.end.us: