llvm.org GIT mirror llvm / 9ee5a28
Loop unroller: set thresholds for optsize and minsize functions to zero Before r268509, Clang would disable the loop unroll pass when optimizing for size. That commit enabled it to be able to support unroll pragmas in -Os builds. However, this regressed binary size in one of Chromium's DLLs with ~100 KB. This restores the original behaviour of no unrolling at -Os, but doing it in LLVM instead of Clang makes more sense, and also allows the pragmas to keep working. Differential revision: http://reviews.llvm.org/D20115 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@269124 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 3 years ago
6 changed file(s) with 38 addition(s) and 172 deletion(s). Raw diff Collapse all Expand all
110110 UP.Threshold = 150;
111111 UP.PercentDynamicCostSavedThreshold = 20;
112112 UP.DynamicCostSavingsDiscount = 2000;
113 UP.OptSizeThreshold = 50;
113 UP.OptSizeThreshold = 0;
114114 UP.PartialThreshold = UP.Threshold;
115 UP.PartialOptSizeThreshold = UP.OptSizeThreshold;
115 UP.PartialOptSizeThreshold = 0;
116116 UP.Count = 0;
117117 UP.MaxCount = UINT_MAX;
118118 UP.FullUnrollMaxCount = UINT_MAX;
0 ; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s -check-prefix=EPILOG
11 ; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
2 define void @unroll_opt_for_size() nounwind optsize {
3 entry:
4 br label %loop
5
6 loop:
7 %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
8 %inc = add i32 %iv, 1
9 %exitcnd = icmp uge i32 %inc, 1024
10 br i1 %exitcnd, label %exit, label %loop
11
12 exit:
13 ret void
14 }
15
16 ; EPILOG-LABEL: @unroll_opt_for_size
17 ; EPILOG: add
18 ; EPILOG-NEXT: add
19 ; EPILOG-NEXT: add
20 ; EPILOG: icmp
21
22 ; PROLOG-LABEL: @unroll_opt_for_size
23 ; PROLOG: add
24 ; PROLOG-NEXT: add
25 ; PROLOG-NEXT: add
26 ; PROLOG: icmp
272
283 define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
294 entry:
0 ; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s
1 define void @unroll_opt_for_size() nounwind optsize {
2 entry:
3 br label %loop
4
5 loop:
6 %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
7 %inc = add i32 %iv, 1
8 %exitcnd = icmp uge i32 %inc, 1024
9 br i1 %exitcnd, label %exit, label %loop
10
11 exit:
12 ret void
13 }
14
15 ; CHECK-LABEL: @unroll_opt_for_size
16 ; CHECK: add
17 ; CHECK-NEXT: add
18 ; CHECK-NEXT: add
19 ; CHECK-NEXT: add
20 ; CHECK-NEXT: add
21 ; CHECK-NEXT: add
22 ; CHECK-NEXT: add
23 ; CHECK-NEXT: add
24 ; CHECK-NEXT: add
25 ; CHECK-NEXT: add
26 ; CHECK-NEXT: add
27 ; CHECK-NEXT: add
28 ; CHECK-NEXT: add
29 ; CHECK-NEXT: add
30 ; CHECK-NEXT: add
31 ; CHECK-NEXT: add
32 ; CHECK-NEXT: add
33 ; CHECK-NEXT: add
34 ; CHECK-NEXT: add
35 ; CHECK-NEXT: add
36 ; CHECK-NEXT: add
37 ; CHECK-NEXT: add
38 ; CHECK-NEXT: add
39 ; CHECK-NEXT: add
40 ; CHECK-NEXT: add
41 ; CHECK-NEXT: add
42 ; CHECK-NEXT: add
43 ; CHECK-NEXT: add
44 ; CHECK-NEXT: add
45 ; CHECK-NEXT: add
46 ; CHECK-NEXT: add
47 ; CHECK-NEXT: add
48 ; CHECK-NEXT: icmp
49
501 define void @unroll_default() nounwind {
512 entry:
523 br label %loop
+0
-53
test/Transforms/LoopUnroll/partial-unroll-optsize.ll less more
None ; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
1 ; RUN: sed -e 's/optsize/minsize/' %s | opt -S -loop-unroll -unroll-allow-partial | FileCheck %s
2
3 ; Loop size = 3, when the function has the optsize attribute, the
4 ; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
5 ; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not).
6 define void @unroll_opt_for_size() nounwind optsize {
7 entry:
8 br label %loop
9
10 loop:
11 %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
12 %inc = add i32 %iv, 1
13 %exitcnd = icmp uge i32 %inc, 1024
14 br i1 %exitcnd, label %exit, label %loop
15
16 exit:
17 ret void
18 }
19
20 ; CHECK: add
21 ; CHECK-NEXT: add
22 ; CHECK-NEXT: add
23 ; CHECK-NEXT: add
24 ; CHECK-NEXT: add
25 ; CHECK-NEXT: add
26 ; CHECK-NEXT: add
27 ; CHECK-NEXT: add
28 ; CHECK-NEXT: add
29 ; CHECK-NEXT: add
30 ; CHECK-NEXT: add
31 ; CHECK-NEXT: add
32 ; CHECK-NEXT: add
33 ; CHECK-NEXT: add
34 ; CHECK-NEXT: add
35 ; CHECK-NEXT: add
36 ; CHECK-NEXT: add
37 ; CHECK-NEXT: add
38 ; CHECK-NEXT: add
39 ; CHECK-NEXT: add
40 ; CHECK-NEXT: add
41 ; CHECK-NEXT: add
42 ; CHECK-NEXT: add
43 ; CHECK-NEXT: add
44 ; CHECK-NEXT: add
45 ; CHECK-NEXT: add
46 ; CHECK-NEXT: add
47 ; CHECK-NEXT: add
48 ; CHECK-NEXT: add
49 ; CHECK-NEXT: add
50 ; CHECK-NEXT: add
51 ; CHECK-NEXT: add
52 ; CHECK-NEXT: icmp
33
44 ;///////////////////// TEST 1 //////////////////////////////
55
6 ; This test shows that with optsize attribute, the loop is unrolled
7 ; according to the specified unroll factor.
6 ; This test shows that the loop is unrolled according to the specified
7 ; unroll factor.
88
9 define void @Test1() nounwind optsize {
9 define void @Test1() nounwind {
1010 entry:
1111 br label %loop
1212
3131
3232 ;///////////////////// TEST 2 //////////////////////////////
3333
34 ; This test shows that with minsize attribute, the loop is unrolled
35 ; according to the specified unroll factor.
34 ; This test shows that with optnone attribute, the loop is not unrolled
35 ; even if an unroll factor was specified.
3636
37 define void @Test2() nounwind minsize {
37 define void @Test2() nounwind optnone noinline {
3838 entry:
3939 br label %loop
4040
5151 ; CHECK_COUNT4-LABEL: @Test2
5252 ; CHECK_COUNT4: phi
5353 ; CHECK_COUNT4-NEXT: add
54 ; CHECK_COUNT4-NEXT: add
55 ; CHECK_COUNT4-NEXT: add
56 ; CHECK_COUNT4-NEXT: add
5754 ; CHECK_COUNT4-NEXT: icmp
5855
5956
6057 ;///////////////////// TEST 3 //////////////////////////////
6158
62 ; This test shows that with optnone attribute, the loop is not unrolled
63 ; even if an unroll factor was specified.
64
65 define void @Test3() nounwind optnone noinline {
66 entry:
67 br label %loop
68
69 loop:
70 %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
71 %inc = add i32 %iv, 1
72 %exitcnd = icmp uge i32 %inc, 1024
73 br i1 %exitcnd, label %exit, label %loop
74
75 exit:
76 ret void
77 }
78
79 ; CHECK_COUNT4-LABEL: @Test3
80 ; CHECK_COUNT4: phi
81 ; CHECK_COUNT4-NEXT: add
82 ; CHECK_COUNT4-NEXT: icmp
83
84
85 ;///////////////////// TEST 4 //////////////////////////////
86
87 ; This test shows that without any attribute, this loop is fully unrolled
88 ; by default.
59 ; This test shows that this loop is fully unrolled by default.
8960
9061 @tab = common global [24 x i32] zeroinitializer, align 4
9162
92 define i32 @Test4() {
63 define i32 @Test3() {
9364 entry:
9465 br label %for.body
9566
10576 ret i32 42
10677 }
10778
108 ; CHECK_NOCOUNT-LABEL: @Test4
79 ; CHECK_NOCOUNT-LABEL: @Test3
10980 ; CHECK_NOCOUNT: store
11081 ; CHECK_NOCOUNT-NEXT: store
11182 ; CHECK_NOCOUNT-NEXT: store
133104 ; CHECK_NOCOUNT-NEXT: ret
134105
135106
136 ;///////////////////// TEST 5 //////////////////////////////
107 ;///////////////////// TEST 4 //////////////////////////////
137108
138 ; This test shows that with optsize attribute, this loop is not unrolled
139 ; by default.
109 ; This test shows that with optsize attribute, this loop is not unrolled.
140110
141 define i32 @Test5() optsize {
111 define i32 @Test4() optsize {
142112 entry:
143113 br label %for.body
144114
154124 ret i32 42
155125 }
156126
157 ; CHECK_NOCOUNT-LABEL: @Test5
127 ; CHECK_NOCOUNT-LABEL: @Test4
158128 ; CHECK_NOCOUNT: phi
159129 ; CHECK_NOCOUNT: icmp
107107 !3 = !{!3, !4}
108108 !4 = !{!"llvm.loop.unroll.full"}
109109
110 ; #pragma clang loop unroll(full)
111 ; Loop should be fully unrolled, even for optsize.
112 ;
113 ; CHECK-LABEL: @loop64_with_full_optsize(
114 ; CHECK-NOT: br i1
115 define void @loop64_with_full_optsize(i32* nocapture %a) optsize {
116 entry:
117 br label %for.body
118
119 for.body: ; preds = %for.body, %entry
120 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
121 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
122 %0 = load i32, i32* %arrayidx, align 4
123 %inc = add nsw i32 %0, 1
124 store i32 %inc, i32* %arrayidx, align 4
125 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
126 %exitcond = icmp eq i64 %indvars.iv.next, 64
127 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
128
129 for.end: ; preds = %for.body
130 ret void
131 }
132
110133 ; #pragma clang loop unroll_count(4)
111134 ; Loop should be unrolled 4 times.
112135 ;