llvm.org GIT mirror llvm / ac70cea
Generalize the cast-of-addrec folding to handle folding of SCEVs like (sext i8 {-128,+,1} to i64) to i64 {-128,+,1}, where the iteration crosses from negative to positive, but is still safe if the trip count is within range. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@70421 91177308-0d34-0410-b5e6-96231b3b80d8 Dan Gohman 11 years ago
3 changed file(s) with 164 addition(s) and 46 deletion(s). Raw diff Collapse all Expand all
717717 SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop());
718718 if (!isa(BECount)) {
719719 // Manually compute the final value for AR, checking for
720 // overflow at each step.
720 // overflow.
721721 SCEVHandle Start = AR->getStart();
722722 SCEVHandle Step = AR->getStepRecurrence(*this);
723723
729729 getTruncateOrZeroExtend(CastedBECount, BECount->getType())) {
730730 const Type *WideTy =
731731 IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
732 // Check whether Start+Step*BECount has no unsigned overflow.
732733 SCEVHandle ZMul =
733734 getMulExpr(CastedBECount,
734735 getTruncateOrZeroExtend(Step, Start->getType()));
735 // Check whether Start+Step*BECount has no unsigned overflow.
736 if (getZeroExtendExpr(ZMul, WideTy) ==
737 getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
738 getZeroExtendExpr(Step, WideTy))) {
739 SCEVHandle Add = getAddExpr(Start, ZMul);
740 if (getZeroExtendExpr(Add, WideTy) ==
741 getAddExpr(getZeroExtendExpr(Start, WideTy),
742 getZeroExtendExpr(ZMul, WideTy)))
743 // Return the expression with the addrec on the outside.
744 return getAddRecExpr(getZeroExtendExpr(Start, Ty),
745 getZeroExtendExpr(Step, Ty),
746 AR->getLoop());
747 }
736 SCEVHandle Add = getAddExpr(Start, ZMul);
737 if (getZeroExtendExpr(Add, WideTy) ==
738 getAddExpr(getZeroExtendExpr(Start, WideTy),
739 getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
740 getZeroExtendExpr(Step, WideTy))))
741 // Return the expression with the addrec on the outside.
742 return getAddRecExpr(getZeroExtendExpr(Start, Ty),
743 getZeroExtendExpr(Step, Ty),
744 AR->getLoop());
748745
749746 // Similar to above, only this time treat the step value as signed.
750747 // This covers loops that count down.
751748 SCEVHandle SMul =
752749 getMulExpr(CastedBECount,
753750 getTruncateOrSignExtend(Step, Start->getType()));
754 // Check whether Start+Step*BECount has no unsigned overflow.
755 if (getSignExtendExpr(SMul, WideTy) ==
756 getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
757 getSignExtendExpr(Step, WideTy))) {
758 SCEVHandle Add = getAddExpr(Start, SMul);
759 if (getZeroExtendExpr(Add, WideTy) ==
760 getAddExpr(getZeroExtendExpr(Start, WideTy),
761 getSignExtendExpr(SMul, WideTy)))
762 // Return the expression with the addrec on the outside.
763 return getAddRecExpr(getZeroExtendExpr(Start, Ty),
764 getSignExtendExpr(Step, Ty),
765 AR->getLoop());
766 }
751 Add = getAddExpr(Start, SMul);
752 if (getZeroExtendExpr(Add, WideTy) ==
753 getAddExpr(getZeroExtendExpr(Start, WideTy),
754 getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
755 getSignExtendExpr(Step, WideTy))))
756 // Return the expression with the addrec on the outside.
757 return getAddRecExpr(getZeroExtendExpr(Start, Ty),
758 getSignExtendExpr(Step, Ty),
759 AR->getLoop());
767760 }
768761 }
769762 }
806799 SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop());
807800 if (!isa(BECount)) {
808801 // Manually compute the final value for AR, checking for
809 // overflow at each step.
802 // overflow.
810803 SCEVHandle Start = AR->getStart();
811804 SCEVHandle Step = AR->getStepRecurrence(*this);
812805
813806 // Check whether the backedge-taken count can be losslessly casted to
814 // the addrec's type. The count needs to be the same whether sign
815 // extended or zero extended.
807 // the addrec's type. The count is always unsigned.
816808 SCEVHandle CastedBECount =
817809 getTruncateOrZeroExtend(BECount, Start->getType());
818810 if (BECount ==
819 getTruncateOrZeroExtend(CastedBECount, BECount->getType()) &&
820 BECount ==
821 getTruncateOrSignExtend(CastedBECount, BECount->getType())) {
811 getTruncateOrZeroExtend(CastedBECount, BECount->getType())) {
822812 const Type *WideTy =
823813 IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
814 // Check whether Start+Step*BECount has no signed overflow.
824815 SCEVHandle SMul =
825816 getMulExpr(CastedBECount,
826817 getTruncateOrSignExtend(Step, Start->getType()));
827 // Check whether Start+Step*BECount has no signed overflow.
828 if (getSignExtendExpr(SMul, WideTy) ==
829 getMulExpr(getSignExtendExpr(CastedBECount, WideTy),
830 getSignExtendExpr(Step, WideTy))) {
831 SCEVHandle Add = getAddExpr(Start, SMul);
832 if (getSignExtendExpr(Add, WideTy) ==
833 getAddExpr(getSignExtendExpr(Start, WideTy),
834 getSignExtendExpr(SMul, WideTy)))
835 // Return the expression with the addrec on the outside.
836 return getAddRecExpr(getSignExtendExpr(Start, Ty),
837 getSignExtendExpr(Step, Ty),
838 AR->getLoop());
839 }
818 SCEVHandle Add = getAddExpr(Start, SMul);
819 if (getSignExtendExpr(Add, WideTy) ==
820 getAddExpr(getSignExtendExpr(Start, WideTy),
821 getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
822 getSignExtendExpr(Step, WideTy))))
823 // Return the expression with the addrec on the outside.
824 return getAddRecExpr(getSignExtendExpr(Start, Ty),
825 getSignExtendExpr(Step, Ty),
826 AR->getLoop());
840827 }
841828 }
842829 }
0 ; RUN: llvm-as < %s | opt -disable-output -scalar-evolution -analyze \
1 ; RUN: | grep { --> \{-128,+,1\} Exits: 127} | count 5
2
3 ; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the
4 ; trip count is within range where this is safe.
5
6 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
7 target triple = "x86_64-unknown-linux-gnu"
8
9 define void @foo(double* nocapture %x) nounwind {
10 bb1.thread:
11 br label %bb1
12
13 bb1: ; preds = %bb1, %bb1.thread
14 %i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ] ; [#uses=3]
15 %0 = trunc i64 %i.0.reg2mem.0 to i8 ; [#uses=1]
16 %1 = trunc i64 %i.0.reg2mem.0 to i9 ; [#uses=1]
17 %2 = sext i9 %1 to i64 ; [#uses=1]
18 %3 = getelementptr double* %x, i64 %2 ; [#uses=1]
19 %4 = load double* %3, align 8 ; [#uses=1]
20 %5 = mul double %4, 3.900000e+00 ; [#uses=1]
21 %6 = sext i8 %0 to i64 ; [#uses=1]
22 %7 = getelementptr double* %x, i64 %6 ; [#uses=1]
23 store double %5, double* %7, align 8
24 %8 = add i64 %i.0.reg2mem.0, 1 ; [#uses=2]
25 %9 = icmp sgt i64 %8, 127 ; [#uses=1]
26 br i1 %9, label %return, label %bb1
27
28 return: ; preds = %bb1
29 ret void
30 }
0 ; RUN: llvm-as < %s | opt -disable-output -scalar-evolution -analyze \
1 ; RUN: | grep { --> (sext i. \{.\*,+,.\*\} to i64)} | count 5
2
3 ; Don't convert (sext {...,+,...}) to {sext(...),+,sext(...)} in cases
4 ; where the trip count is not within range.
5
6 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
7 target triple = "x86_64-unknown-linux-gnu"
8
9 define void @foo0(double* nocapture %x) nounwind {
10 bb1.thread:
11 br label %bb1
12
13 bb1: ; preds = %bb1, %bb1.thread
14 %i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ] ; [#uses=3]
15 %0 = trunc i64 %i.0.reg2mem.0 to i7 ; [#uses=1]
16 %1 = trunc i64 %i.0.reg2mem.0 to i9 ; [#uses=1]
17 %2 = sext i9 %1 to i64 ; [#uses=1]
18 %3 = getelementptr double* %x, i64 %2 ; [#uses=1]
19 %4 = load double* %3, align 8 ; [#uses=1]
20 %5 = mul double %4, 3.900000e+00 ; [#uses=1]
21 %6 = sext i7 %0 to i64 ; [#uses=1]
22 %7 = getelementptr double* %x, i64 %6 ; [#uses=1]
23 store double %5, double* %7, align 8
24 %8 = add i64 %i.0.reg2mem.0, 1 ; [#uses=2]
25 %9 = icmp sgt i64 %8, 127 ; [#uses=1]
26 br i1 %9, label %return, label %bb1
27
28 return: ; preds = %bb1
29 ret void
30 }
31
32 define void @foo1(double* nocapture %x) nounwind {
33 bb1.thread:
34 br label %bb1
35
36 bb1: ; preds = %bb1, %bb1.thread
37 %i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ] ; [#uses=3]
38 %0 = trunc i64 %i.0.reg2mem.0 to i8 ; [#uses=1]
39 %1 = trunc i64 %i.0.reg2mem.0 to i9 ; [#uses=1]
40 %2 = sext i9 %1 to i64 ; [#uses=1]
41 %3 = getelementptr double* %x, i64 %2 ; [#uses=1]
42 %4 = load double* %3, align 8 ; [#uses=1]
43 %5 = mul double %4, 3.900000e+00 ; [#uses=1]
44 %6 = sext i8 %0 to i64 ; [#uses=1]
45 %7 = getelementptr double* %x, i64 %6 ; [#uses=1]
46 store double %5, double* %7, align 8
47 %8 = add i64 %i.0.reg2mem.0, 1 ; [#uses=2]
48 %9 = icmp sgt i64 %8, 128 ; [#uses=1]
49 br i1 %9, label %return, label %bb1
50
51 return: ; preds = %bb1
52 ret void
53 }
54
55 define void @foo2(double* nocapture %x) nounwind {
56 bb1.thread:
57 br label %bb1
58
59 bb1: ; preds = %bb1, %bb1.thread
60 %i.0.reg2mem.0 = phi i64 [ -129, %bb1.thread ], [ %8, %bb1 ] ; [#uses=3]
61 %0 = trunc i64 %i.0.reg2mem.0 to i8 ; [#uses=1]
62 %1 = trunc i64 %i.0.reg2mem.0 to i9 ; [#uses=1]
63 %2 = sext i9 %1 to i64 ; [#uses=1]
64 %3 = getelementptr double* %x, i64 %2 ; [#uses=1]
65 %4 = load double* %3, align 8 ; [#uses=1]
66 %5 = mul double %4, 3.900000e+00 ; [#uses=1]
67 %6 = sext i8 %0 to i64 ; [#uses=1]
68 %7 = getelementptr double* %x, i64 %6 ; [#uses=1]
69 store double %5, double* %7, align 8
70 %8 = add i64 %i.0.reg2mem.0, 1 ; [#uses=2]
71 %9 = icmp sgt i64 %8, 127 ; [#uses=1]
72 br i1 %9, label %return, label %bb1
73
74 return: ; preds = %bb1
75 ret void
76 }
77
78 define void @foo3(double* nocapture %x) nounwind {
79 bb1.thread:
80 br label %bb1
81
82 bb1: ; preds = %bb1, %bb1.thread
83 %i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ] ; [#uses=3]
84 %0 = trunc i64 %i.0.reg2mem.0 to i8 ; [#uses=1]
85 %1 = trunc i64 %i.0.reg2mem.0 to i9 ; [#uses=1]
86 %2 = sext i9 %1 to i64 ; [#uses=1]
87 %3 = getelementptr double* %x, i64 %2 ; [#uses=1]
88 %4 = load double* %3, align 8 ; [#uses=1]
89 %5 = mul double %4, 3.900000e+00 ; [#uses=1]
90 %6 = sext i8 %0 to i64 ; [#uses=1]
91 %7 = getelementptr double* %x, i64 %6 ; [#uses=1]
92 store double %5, double* %7, align 8
93 %8 = add i64 %i.0.reg2mem.0, -1 ; [#uses=2]
94 %9 = icmp sgt i64 %8, 127 ; [#uses=1]
95 br i1 %9, label %return, label %bb1
96
97 return: ; preds = %bb1
98 ret void
99 }