llvm.org GIT mirror llvm / 7fe3ccf
[LoopPred] Hoist and of predicated checks where legal If we have multiple range checks which can be predicated, hoist the and of the results outside the loop. This minorly cleans up the resulting IR, but the main motivation is as a building block for D60093. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358419 91177308-0d34-0410-b5e6-96231b3b80d8 Philip Reames 5 months ago
3 changed file(s) with 27 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
263263
264264 Optional parseLoopLatchICmp();
265265
266 /// Return an insertion point suitable for inserting a safe to speculate
267 /// instruction whose only user will be 'User' which has operands 'Ops'. A
268 /// trivial result would be the at the User itself, but we try to return a
269 /// loop invariant location if possible.
270 Instruction *findInsertPt(Instruction *User, ArrayRef Ops);
271
266272 bool CanExpand(const SCEV* S);
267273 Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder,
268274 ICmpInst::Predicate Pred, const SCEV *LHS,
437443 return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
438444 }
439445
446 Instruction *LoopPredication::findInsertPt(Instruction *Use,
447 ArrayRef Ops) {
448 for (Value *Op : Ops)
449 if (!L->isLoopInvariant(Op))
450 return Use;
451 return Preheader->getTerminator();
452 }
453
440454 bool LoopPredication::CanExpand(const SCEV* S) {
441455 return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
442456 }
651665 TotalWidened += NumWidened;
652666
653667 // Emit the new guard condition
654 Builder.SetInsertPoint(Guard);
668 Builder.SetInsertPoint(findInsertPt(Guard, Checks));
655669 Value *LastCheck = nullptr;
656670 for (auto *Check : Checks)
657671 if (!LastCheck)
683697 TotalWidened += NumWidened;
684698
685699 // Emit the new guard condition
686 Builder.SetInsertPoint(BI);
700 Builder.SetInsertPoint(findInsertPt(BI, Checks));
687701 Value *LastCheck = nullptr;
688702 for (auto *Check : Checks)
689703 if (!LastCheck)
975975 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i32 [[N]], [[LENGTH_1:%.*]]
976976 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 0, [[LENGTH_1]]
977977 ; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP3]]
978 ; CHECK-NEXT: br label [[LOOP:%.*]]
979 ; CHECK: loop:
980 ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
981 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
982978 ; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP2]], [[TMP5]]
979 ; CHECK-NEXT: br label [[LOOP:%.*]]
980 ; CHECK: loop:
981 ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
982 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
983983 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP6]], i32 9) [ "deopt"() ]
984984 ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
985985 ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]]
10471047 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i32 [[N]], [[LENGTH_1:%.*]]
10481048 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 0, [[LENGTH_1]]
10491049 ; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[TMP6]]
1050 ; CHECK-NEXT: br label [[LOOP:%.*]]
1051 ; CHECK: loop:
1052 ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
1053 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
10541050 ; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP2]], [[TMP5]]
10551051 ; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP9]], [[TMP8]]
1052 ; CHECK-NEXT: br label [[LOOP:%.*]]
1053 ; CHECK: loop:
1054 ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
1055 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
10561056 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP10]], i32 9) [ "deopt"() ]
10571057 ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
10581058 ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]]
1919 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i32 16, [[TMP4]]
2020 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[OFFA]], [[LENGTHA]]
2121 ; CHECK-NEXT: [[TMP7:%.*]] = and i1 [[TMP6]], [[TMP5]]
22 ; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP3]], [[TMP7]]
2223 ; CHECK-NEXT: br label [[LOOP:%.*]]
2324 ; CHECK: loop:
2425 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
2627 ; CHECK-NEXT: [[IV_TRUNC_16:%.*]] = trunc i64 [[IV]] to i16
2728 ; CHECK-NEXT: [[INDEXA:%.*]] = add i32 [[IV_TRUNC_32]], [[OFFA]]
2829 ; CHECK-NEXT: [[INDEXB:%.*]] = add i16 [[IV_TRUNC_16]], [[OFFB]]
29 ; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP3]], [[TMP7]]
3030 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP8]], i32 9) [ "deopt"() ]
3131 ; CHECK-NEXT: [[INDEXA_EXT:%.*]] = zext i32 [[INDEXA]] to i64
3232 ; CHECK-NEXT: [[ADDRA:%.*]] = getelementptr inbounds i8, i8* [[ARRA]], i64 [[INDEXA_EXT]]
9292 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule i32 15, [[TMP10]]
9393 ; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i32 [[OFFA]], [[LENGTHA]]
9494 ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP12]], [[TMP11]]
95 ; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP4]], [[TMP8]]
96 ; CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP14]], [[TMP13]]
9597 ; CHECK-NEXT: br label [[LOOP:%.*]]
9698 ; CHECK: loop:
9799 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
98100 ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32
99101 ; CHECK-NEXT: [[INDEXA:%.*]] = add i32 [[IV_TRUNC]], [[OFFA]]
100102 ; CHECK-NEXT: [[INDEXB:%.*]] = add i32 [[IV_TRUNC]], [[OFFB]]
101 ; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP4]], [[TMP8]]
102 ; CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP14]], [[TMP13]]
103103 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP15]], i32 9) [ "deopt"() ]
104104 ; CHECK-NEXT: [[INDEXA_EXT:%.*]] = zext i32 [[INDEXA]] to i64
105105 ; CHECK-NEXT: [[ADDRA:%.*]] = getelementptr inbounds i8, i8* [[ARRA]], i64 [[INDEXA_EXT]]