llvm.org GIT mirror llvm / 0498bd2
[SCEV] Apply NSW and NUW flags via poison value analysis for sub, mul and shl Summary: http://reviews.llvm.org/D11212 made Scalar Evolution able to propagate NSW and NUW flags from instructions to SCEVs for add instructions. This patch expands that to sub, mul and shl instructions. This change makes LSR able to generate pointer induction variables for loops like these, where the index is 32 bit and the pointer is 64 bit: for (int i = 0; i < numIterations; ++i) sum += ptr[i - offset]; for (int i = 0; i < numIterations; ++i) sum += ptr[i * stride]; for (int i = 0; i < numIterations; ++i) sum += ptr[3 * (i << 7)]; Reviewers: atrick, sanjoy Subscribers: sanjoy, majnemer, hfinkel, llvm-commits, meheff, jingyue, eliben Differential Revision: http://reviews.llvm.org/D11860 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245118 91177308-0d34-0410-b5e6-96231b3b80d8 Bjarke Hammersholt Roune 5 years ago
6 changed file(s) with 423 addition(s) and 39 deletion(s). Raw diff Collapse all Expand all
711711
712712 /// getNegativeSCEV - Return the SCEV object corresponding to -V.
713713 ///
714 const SCEV *getNegativeSCEV(const SCEV *V);
714 const SCEV *getNegativeSCEV(const SCEV *V,
715 SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
715716
716717 /// getNotSCEV - Return the SCEV object corresponding to ~V.
717718 ///
33383338
33393339 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
33403340 ///
3341 const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
3341 const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
3342 SCEV::NoWrapFlags Flags) {
33423343 if (const SCEVConstant *VC = dyn_cast(V))
33433344 return getConstant(
33443345 cast(ConstantExpr::getNeg(VC->getValue())));
33453346
33463347 Type *Ty = V->getType();
33473348 Ty = getEffectiveSCEVType(Ty);
3348 return getMulExpr(V,
3349 getConstant(cast(Constant::getAllOnesValue(Ty))));
3349 return getMulExpr(
3350 V, getConstant(cast(Constant::getAllOnesValue(Ty))), Flags);
33503351 }
33513352
33523353 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
33653366 /// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
33663367 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
33673368 SCEV::NoWrapFlags Flags) {
3368 assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
3369
33703369 // Fast path: X - X --> 0.
33713370 if (LHS == RHS)
33723371 return getConstant(LHS->getType(), 0);
33733372
3374 // X - Y --> X + -Y.
3375 // X -(nsw || nuw) Y --> X + -Y.
3376 return getAddExpr(LHS, getNegativeSCEV(RHS));
3373 // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
3374 // makes it so that we cannot make much use of NUW.
3375 auto AddFlags = SCEV::FlagAnyWrap;
3376 const bool RHSIsNotMinSigned =
3377 !getSignedRange(RHS).getSignedMin().isMinSignedValue();
3378 if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
3379 // Let M be the minimum representable signed value. Then (-1)*RHS
3380 // signed-wraps if and only if RHS is M. That can happen even for
3381 // a NSW subtraction because e.g. (-1)*M signed-wraps even though
3382 // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
3383 // (-1)*RHS, we need to prove that RHS != M.
3384 //
3385 // If LHS is non-negative and we know that LHS - RHS does not
3386 // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
3387 // either by proving that RHS > M or that LHS >= 0.
3388 if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
3389 AddFlags = SCEV::FlagNSW;
3390 }
3391 }
3392
3393 // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
3394 // RHS is NSW and LHS >= 0.
3395 //
3396 // The difficulty here is that the NSW flag may have been proven
3397 // relative to a loop that is to be found in a recurrence in LHS and
3398 // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
3399 // larger scope than intended.
3400 auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
3401
3402 return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
33773403 }
33783404
33793405 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
40934119 }
40944120
40954121 SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
4122 if (isa(V)) return SCEV::FlagAnyWrap;
40964123 const BinaryOperator *BinOp = cast(V);
40974124
40984125 // Return early if there are no flags to propagate to the SCEV.
41844211 // because it leads to N-1 getAddExpr calls for N ultimate operands.
41854212 // Instead, gather up all the operands and make a single getAddExpr call.
41864213 // LLVM IR canonical form means we need only traverse the left operands.
4187 //
4188 // FIXME: Expand this handling of NSW and NUW to other instructions, like
4189 // sub and mul.
41904214 SmallVector AddOps;
41914215 for (Value *Op = U;; Op = U->getOperand(0)) {
41924216 U = dyn_cast(Op);
41974221 break;
41984222 }
41994223
4200 if (auto *OpSCEV = getExistingSCEV(Op)) {
4224 if (auto *OpSCEV = getExistingSCEV(U)) {
42014225 AddOps.push_back(OpSCEV);
42024226 break;
42034227 }
42094233 // since the flags are only known to apply to this particular
42104234 // addition - they may not apply to other additions that can be
42114235 // formed with operands from AddOps.
4212 //
4213 // FIXME: Expand this to sub instructions.
4214 if (Opcode == Instruction::Add && isa(U)) {
4215 SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
4216 if (Flags != SCEV::FlagAnyWrap) {
4217 AddOps.push_back(getAddExpr(getSCEV(U->getOperand(0)),
4218 getSCEV(U->getOperand(1)), Flags));
4219 break;
4220 }
4221 }
4222
4223 const SCEV *Op1 = getSCEV(U->getOperand(1));
4236 const SCEV *RHS = getSCEV(U->getOperand(1));
4237 SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
4238 if (Flags != SCEV::FlagAnyWrap) {
4239 const SCEV *LHS = getSCEV(U->getOperand(0));
4240 if (Opcode == Instruction::Sub)
4241 AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
4242 else
4243 AddOps.push_back(getAddExpr(LHS, RHS, Flags));
4244 break;
4245 }
4246
42244247 if (Opcode == Instruction::Sub)
4225 AddOps.push_back(getNegativeSCEV(Op1));
4248 AddOps.push_back(getNegativeSCEV(RHS));
42264249 else
4227 AddOps.push_back(Op1);
4250 AddOps.push_back(RHS);
42284251 }
42294252 return getAddExpr(AddOps);
42304253 }
42314254
42324255 case Instruction::Mul: {
4233 // FIXME: Transfer NSW/NUW as in AddExpr.
42344256 SmallVector MulOps;
4235 MulOps.push_back(getSCEV(U->getOperand(1)));
4236 for (Value *Op = U->getOperand(0);
4237 Op->getValueID() == Instruction::Mul + Value::InstructionVal;
4238 Op = U->getOperand(0)) {
4239 U = cast(Op);
4257 for (Value *Op = U;; Op = U->getOperand(0)) {
4258 U = dyn_cast(Op);
4259 if (!U || U->getOpcode() != Instruction::Mul) {
4260 assert(Op != V && "V should be a mul");
4261 MulOps.push_back(getSCEV(Op));
4262 break;
4263 }
4264
4265 if (auto *OpSCEV = getExistingSCEV(U)) {
4266 MulOps.push_back(OpSCEV);
4267 break;
4268 }
4269
4270 SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
4271 if (Flags != SCEV::FlagAnyWrap) {
4272 MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
4273 getSCEV(U->getOperand(1)), Flags));
4274 break;
4275 }
4276
42404277 MulOps.push_back(getSCEV(U->getOperand(1)));
42414278 }
4242 MulOps.push_back(getSCEV(U->getOperand(0)));
42434279 return getMulExpr(MulOps);
42444280 }
42454281 case Instruction::UDiv:
42464282 return getUDivExpr(getSCEV(U->getOperand(0)),
42474283 getSCEV(U->getOperand(1)));
42484284 case Instruction::Sub:
4249 return getMinusSCEV(getSCEV(U->getOperand(0)),
4250 getSCEV(U->getOperand(1)));
4285 return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
4286 getNoWrapFlagsFromUB(U));
42514287 case Instruction::And:
42524288 // For an expression like x&255 that merely masks off the high bits,
42534289 // use zext(trunc(x)) as the SCEV expression.
43674403 if (SA->getValue().uge(BitWidth))
43684404 break;
43694405
4406 // It is currently not resolved how to interpret NSW for left
4407 // shift by BitWidth - 1, so we avoid applying flags in that
4408 // case. Remove this check (or this comment) once the situation
4409 // is resolved. See
4410 // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
4411 // and http://reviews.llvm.org/D8890 .
4412 auto Flags = SCEV::FlagAnyWrap;
4413 if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
4414
43704415 Constant *X = ConstantInt::get(getContext(),
43714416 APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
4372 return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
4417 return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
43734418 }
43744419 break;
43754420
99 ; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
1010 ; CHECK: Base offset: %A
1111 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
12 ; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
12 ; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<nw><%for.k>]
1313
1414 define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
1515 entry:
355355 exit:
356356 ret void
357357 }
358
359 ; Example where a mul should get the nsw flag, so that a sext can be
360 ; distributed over the mul.
361 define void @test-mul-nsw(float* %input, i32 %stride, i32 %numIterations) {
362 ; CHECK-LABEL: @test-mul-nsw
363 entry:
364 br label %loop
365 loop:
366 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
367
368 ; CHECK: %index32 =
369 ; CHECK: --> {0,+,%stride}
370 %index32 = mul nsw i32 %i, %stride
371
372 ; CHECK: %index64 =
373 ; CHECK: --> {0,+,(sext i32 %stride to i64)}
374 %index64 = sext i32 %index32 to i64
375
376 %ptr = getelementptr inbounds float, float* %input, i64 %index64
377 %nexti = add nsw i32 %i, 1
378 %f = load float, float* %ptr, align 4
379 %exitcond = icmp eq i32 %nexti, %numIterations
380 br i1 %exitcond, label %exit, label %loop
381 exit:
382 ret void
383 }
384
385 ; Example where a mul should get the nuw flag.
386 define void @test-mul-nuw(float* %input, i32 %stride, i32 %numIterations) {
387 ; CHECK-LABEL: @test-mul-nuw
388 entry:
389 br label %loop
390 loop:
391 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
392
393 ; CHECK: %index32 =
394 ; CHECK: --> {0,+,%stride}
395 %index32 = mul nuw i32 %i, %stride
396
397 %ptr = getelementptr inbounds float, float* %input, i32 %index32
398 %nexti = add nuw i32 %i, 1
399 %f = load float, float* %ptr, align 4
400 %exitcond = icmp eq i32 %nexti, %numIterations
401 br i1 %exitcond, label %exit, label %loop
402
403 exit:
404 ret void
405 }
406
407 ; Example where a shl should get the nsw flag, so that a sext can be
408 ; distributed over the shl.
409 define void @test-shl-nsw(float* %input, i32 %start, i32 %numIterations) {
410 ; CHECK-LABEL: @test-shl-nsw
411 entry:
412 br label %loop
413 loop:
414 %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
415
416 ; CHECK: %index32 =
417 ; CHECK: --> {(256 * %start),+,256}
418 %index32 = shl nsw i32 %i, 8
419
420 ; CHECK: %index64 =
421 ; CHECK: --> {(sext i32 (256 * %start) to i64),+,256}
422 %index64 = sext i32 %index32 to i64
423
424 %ptr = getelementptr inbounds float, float* %input, i64 %index64
425 %nexti = add nsw i32 %i, 1
426 %f = load float, float* %ptr, align 4
427 %exitcond = icmp eq i32 %nexti, %numIterations
428 br i1 %exitcond, label %exit, label %loop
429 exit:
430 ret void
431 }
432
433 ; Example where a shl should get the nuw flag.
434 define void @test-shl-nuw(float* %input, i32 %numIterations) {
435 ; CHECK-LABEL: @test-shl-nuw
436 entry:
437 br label %loop
438 loop:
439 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
440
441 ; CHECK: %index32 =
442 ; CHECK: --> {0,+,512}
443 %index32 = shl nuw i32 %i, 9
444
445 %ptr = getelementptr inbounds float, float* %input, i32 %index32
446 %nexti = add nuw i32 %i, 1
447 %f = load float, float* %ptr, align 4
448 %exitcond = icmp eq i32 %nexti, %numIterations
449 br i1 %exitcond, label %exit, label %loop
450
451 exit:
452 ret void
453 }
454
455 ; Example where a sub should *not* get the nsw flag, because of how
456 ; scalar evolution represents A - B as A + (-B) and -B can wrap even
457 ; in cases where A - B does not.
458 define void @test-sub-no-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
459 ; CHECK-LABEL: @test-sub-no-nsw
460 entry:
461 br label %loop
462 loop:
463 %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
464
465 ; CHECK: %index32 =
466 ; CHECK: --> {((-1 * %sub) + %start),+,1}
467 %index32 = sub nsw i32 %i, %sub
468 %index64 = sext i32 %index32 to i64
469
470 %ptr = getelementptr inbounds float, float* %input, i64 %index64
471 %nexti = add nsw i32 %i, 1
472 %f = load float, float* %ptr, align 4
473 %exitcond = icmp eq i32 %nexti, %numIterations
474 br i1 %exitcond, label %exit, label %loop
475 exit:
476 ret void
477 }
478
479 ; Example where a sub should get the nsw flag as the RHS cannot be the
480 ; minimal signed value.
481 define void @test-sub-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
482 ; CHECK-LABEL: @test-sub-nsw
483 entry:
484 %halfsub = ashr i32 %sub, 1
485 br label %loop
486 loop:
487 %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
488
489 ; CHECK: %index32 =
490 ; CHECK: --> {((-1 * %halfsub) + %start),+,1}
491 %index32 = sub nsw i32 %i, %halfsub
492 %index64 = sext i32 %index32 to i64
493
494 %ptr = getelementptr inbounds float, float* %input, i64 %index64
495 %nexti = add nsw i32 %i, 1
496 %f = load float, float* %ptr, align 4
497 %exitcond = icmp eq i32 %nexti, %numIterations
498 br i1 %exitcond, label %exit, label %loop
499 exit:
500 ret void
501 }
502
503 ; Example where a sub should get the nsw flag, since the LHS is non-negative,
504 ; which implies that the RHS cannot be the minimal signed value.
505 define void @test-sub-nsw-lhs-non-negative(float* %input, i32 %sub, i32 %numIterations) {
506 ; CHECK-LABEL: @test-sub-nsw-lhs-non-negative
507 entry:
508 br label %loop
509 loop:
510 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
511
512 ; CHECK: %index32 =
513 ; CHECK: --> {(-1 * %sub),+,1}
514 %index32 = sub nsw i32 %i, %sub
515
516 ; CHECK: %index64 =
517 ; CHECK: --> {(sext i32 (-1 * %sub) to i64),+,1}
518 %index64 = sext i32 %index32 to i64
519
520 %ptr = getelementptr inbounds float, float* %input, i64 %index64
521 %nexti = add nsw i32 %i, 1
522 %f = load float, float* %ptr, align 4
523 %exitcond = icmp eq i32 %nexti, %numIterations
524 br i1 %exitcond, label %exit, label %loop
525 exit:
526 ret void
527 }
528
529 ; Two adds with a sub in the middle and the sub should have nsw. There is
530 ; a special case for sequential adds/subs and this test covers that. We have to
531 ; put the final add first in the program since otherwise the special case
532 ; is not triggered, hence the strange basic block ordering.
533 define void @test-sub-with-add(float* %input, i32 %offset, i32 %numIterations) {
534 ; CHECK-LABEL: @test-sub-with-add
535 entry:
536 br label %loop
537 loop2:
538 ; CHECK: %seq =
539 ; CHECK: --> {(2 + (-1 * %offset)),+,1}
540 %seq = add nsw nuw i32 %index32, 1
541 %exitcond = icmp eq i32 %nexti, %numIterations
542 br i1 %exitcond, label %exit, label %loop
543
544 loop:
545 %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
546
547 %j = add nsw i32 %i, 1
548 ; CHECK: %index32 =
549 ; CHECK: --> {(1 + (-1 * %offset)),+,1}
550 %index32 = sub nsw i32 %j, %offset
551
552 %ptr = getelementptr inbounds float, float* %input, i32 %index32
553 %nexti = add nsw i32 %i, 1
554 store float 1.0, float* %ptr, align 4
555 br label %loop2
556 exit:
557 ret void
558 }
559
560
561 ; Subtraction of two recurrences. The addition in the SCEV that this
562 ; maps to is NSW, but the negation of the RHS does not since that
563 ; recurrence could be the most negative representable value.
564 define void @subrecurrences(i32 %outer_l, i32 %inner_l, i32 %val) {
565 ; CHECK-LABEL: @subrecurrences
566 entry:
567 br label %outer
568
569 outer:
570 %o_idx = phi i32 [ 0, %entry ], [ %o_idx.inc, %outer.be ]
571 %o_idx.inc = add nsw i32 %o_idx, 1
572 %cond = icmp eq i32 %o_idx, %val
573 br i1 %cond, label %inner, label %outer.be
574
575 inner:
576 %i_idx = phi i32 [ 0, %outer ], [ %i_idx.inc, %inner ]
577 %i_idx.inc = add nsw i32 %i_idx, 1
578 ; CHECK: %v =
579 ; CHECK-NEXT: --> {{[{][{]}}-1,+,-1}<%outer>,+,1}<%inner>
580 %v = sub nsw i32 %i_idx, %o_idx.inc
581 %forub = udiv i32 1, %v
582 %cond2 = icmp eq i32 %i_idx, %inner_l
583 br i1 %cond2, label %outer.be, label %inner
584
585 outer.be:
586 %cond3 = icmp eq i32 %o_idx, %outer_l
587 br i1 %cond3, label %exit, label %outer
588
589 exit:
590 ret void
591 }
3232 %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6
3333 ; min(N, i+3)
3434 ; CHECK: select i1 %tmp4, i64 %tmp5, i64 %tmp6
35 ; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64))))))
35 ; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64))))))
3636 %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9
3737 %tmp12 = load i32, i32* %tmp11, align 4
3838 %tmp13 = shl nsw i32 %tmp12, 1
77 ; instruction to the SCEV, preventing distributing sext into the
88 ; corresponding addrec.
99
10 ; Test this pattern:
11 ;
12 ; for (int i = 0; i < numIterations; ++i)
13 ; sum += ptr[i + offset];
14 ;
1015 define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
1116 ; CHECK-LABEL: @testadd
1217 ; CHECK: sext i32 %offset to i64
3338 exit:
3439 ret float %nextsum
3540 }
41
42 ; Test this pattern:
43 ;
44 ; for (int i = 0; i < numIterations; ++i)
45 ; sum += ptr[i - offset];
46 ;
47 define float @testsub(float* %input, i32 %offset, i32 %numIterations) {
48 ; CHECK-LABEL: @testsub
49 ; CHECK: sub i32 0, %offset
50 ; CHECK: sext i32
51 ; CHECK: loop:
52 ; CHECK-DAG: phi float*
53 ; CHECK-DAG: phi i32
54 ; CHECK-NOT: sext
55
56 entry:
57 br label %loop
58
59 loop:
60 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
61 %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
62 %index32 = sub nuw nsw i32 %i, %offset
63 %index64 = sext i32 %index32 to i64
64 %ptr = getelementptr inbounds float, float* %input, i64 %index64
65 %addend = load float, float* %ptr, align 4
66 %nextsum = fadd float %sum, %addend
67 %nexti = add nuw nsw i32 %i, 1
68 %exitcond = icmp eq i32 %nexti, %numIterations
69 br i1 %exitcond, label %exit, label %loop
70
71 exit:
72 ret float %nextsum
73 }
74
75 ; Test this pattern:
76 ;
77 ; for (int i = 0; i < numIterations; ++i)
78 ; sum += ptr[i * stride];
79 ;
80 define float @testmul(float* %input, i32 %stride, i32 %numIterations) {
81 ; CHECK-LABEL: @testmul
82 ; CHECK: sext i32 %stride to i64
83 ; CHECK: loop:
84 ; CHECK-DAG: phi float*
85 ; CHECK-DAG: phi i32
86 ; CHECK-NOT: sext
87
88 entry:
89 br label %loop
90
91 loop:
92 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
93 %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
94 %index32 = mul nuw nsw i32 %i, %stride
95 %index64 = sext i32 %index32 to i64
96 %ptr = getelementptr inbounds float, float* %input, i64 %index64
97 %addend = load float, float* %ptr, align 4
98 %nextsum = fadd float %sum, %addend
99 %nexti = add nuw nsw i32 %i, 1
100 %exitcond = icmp eq i32 %nexti, %numIterations
101 br i1 %exitcond, label %exit, label %loop
102
103 exit:
104 ret float %nextsum
105 }
106
107 ; Test this pattern:
108 ;
109 ; for (int i = 0; i < numIterations; ++i)
110 ; sum += ptr[3 * (i << 7)];
111 ;
112 ; The multiplication by 3 is to make the address calculation expensive
113 ; enough to force the introduction of a pointer induction variable.
114 define float @testshl(float* %input, i32 %numIterations) {
115 ; CHECK-LABEL: @testshl
116 ; CHECK: loop:
117 ; CHECK-DAG: phi float*
118 ; CHECK-DAG: phi i32
119 ; CHECK-NOT: sext
120
121 entry:
122 br label %loop
123
124 loop:
125 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
126 %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
127 %index32 = shl nuw nsw i32 %i, 7
128 %index32mul = mul nuw nsw i32 %index32, 3
129 %index64 = sext i32 %index32mul to i64
130 %ptr = getelementptr inbounds float, float* %input, i64 %index64
131 %addend = load float, float* %ptr, align 4
132 %nextsum = fadd float %sum, %addend
133 %nexti = add nuw nsw i32 %i, 1
134 %exitcond = icmp eq i32 %nexti, %numIterations
135 br i1 %exitcond, label %exit, label %loop
136
137 exit:
138 ret float %nextsum
139 }