llvm.org GIT mirror llvm / 9e495c5
R600: Make cttz / ctlz cheap to speculate Speculating things is generally good. SI+ has instructions for these for 32-bit values. This is still probably better even with the expansion for 64-bit values, although it is odd that this callback doesn't have the size as a parameter. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225822 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 5 years ago
3 changed file(s) with 239 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
468468 return ((LScalarSize <= CastScalarSize) ||
469469 (CastScalarSize >= 32) ||
470470 (LScalarSize < 32));
471 }
472
473 // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
474 // profitable with the expansion for 64-bit since it's generally good to
475 // speculate things.
476 // FIXME: These should really have the size as a parameter.
477 bool AMDGPUTargetLowering::isCheapToSpeculateCttz() const {
478 return true;
479 }
480
481 bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
482 return true;
471483 }
472484
473485 //===---------------------------------------------------------------------===//
128128 EVT ExtVT) const override;
129129
130130 bool isLoadBitCastBeneficial(EVT, EVT) const override;
131 bool isCheapToSpeculateCttz() const override;
132 bool isCheapToSpeculateCtlz() const override;
133
131134 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
132135 bool isVarArg,
133136 const SmallVectorImpl &Outs,
0 ; RUN: opt -S -codegenprepare -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
1
2
3 define i64 @test1(i64 %A) {
4 ; ALL-LABEL: @test1(
5 ; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
6 ; SI-NEXT: ret i64 [[CTLZ]]
7 entry:
8 %tobool = icmp eq i64 %A, 0
9 br i1 %tobool, label %cond.end, label %cond.true
10
11 cond.true: ; preds = %entry
12 %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
13 br label %cond.end
14
15 cond.end: ; preds = %entry, %cond.true
16 %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
17 ret i64 %cond
18 }
19
20
21 define i32 @test2(i32 %A) {
22 ; ALL-LABEL: @test2(
23 ; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i32 @llvm.ctlz.i32(i32 %A, i1 false)
24 ; SI-NEXT: ret i32 [[CTLZ]]
25 entry:
26 %tobool = icmp eq i32 %A, 0
27 br i1 %tobool, label %cond.end, label %cond.true
28
29 cond.true: ; preds = %entry
30 %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
31 br label %cond.end
32
33 cond.end: ; preds = %entry, %cond.true
34 %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
35 ret i32 %cond
36 }
37
38
39 define signext i16 @test3(i16 signext %A) {
40 ; ALL-LABEL: @test3(
41 ; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i16 @llvm.ctlz.i16(i16 %A, i1 false)
42 ; SI-NEXT: ret i16 [[CTLZ]]
43 entry:
44 %tobool = icmp eq i16 %A, 0
45 br i1 %tobool, label %cond.end, label %cond.true
46
47 cond.true: ; preds = %entry
48 %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
49 br label %cond.end
50
51 cond.end: ; preds = %entry, %cond.true
52 %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
53 ret i16 %cond
54 }
55
56
57 define i64 @test1b(i64 %A) {
58 ; ALL-LABEL: @test1b(
59 ; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %A, i1 false)
60 ; SI-NEXT: ret i64 [[CTTZ]]
61 entry:
62 %tobool = icmp eq i64 %A, 0
63 br i1 %tobool, label %cond.end, label %cond.true
64
65 cond.true: ; preds = %entry
66 %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
67 br label %cond.end
68
69 cond.end: ; preds = %entry, %cond.true
70 %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
71 ret i64 %cond
72 }
73
74
75 define i32 @test2b(i32 %A) {
76 ; ALL-LABEL: @test2b(
77 ; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %A, i1 false)
78 ; SI-NEXT: ret i32 [[CTTZ]]
79 entry:
80 %tobool = icmp eq i32 %A, 0
81 br i1 %tobool, label %cond.end, label %cond.true
82
83 cond.true: ; preds = %entry
84 %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
85 br label %cond.end
86
87 cond.end: ; preds = %entry, %cond.true
88 %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
89 ret i32 %cond
90 }
91
92
93 define signext i16 @test3b(i16 signext %A) {
94 ; ALL-LABEL: @test3b(
95 ; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i16 @llvm.cttz.i16(i16 %A, i1 false)
96 ; SI-NEXT: ret i16 [[CTTZ]]
97 entry:
98 %tobool = icmp eq i16 %A, 0
99 br i1 %tobool, label %cond.end, label %cond.true
100
101 cond.true: ; preds = %entry
102 %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
103 br label %cond.end
104
105 cond.end: ; preds = %entry, %cond.true
106 %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
107 ret i16 %cond
108 }
109
110
111 define i64 @test1c(i64 %A) {
112 ; ALL-LABEL: @test1c(
113 ; ALL: icmp eq i64 %A, 0
114 ; ALL: call i64 @llvm.ctlz.i64(i64 %A, i1 true)
115 entry:
116 %tobool = icmp eq i64 %A, 0
117 br i1 %tobool, label %cond.end, label %cond.true
118
119 cond.true: ; preds = %entry
120 %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
121 br label %cond.end
122
123 cond.end: ; preds = %entry, %cond.true
124 %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
125 ret i64 %cond
126 }
127
128 define i32 @test2c(i32 %A) {
129 ; ALL-LABEL: @test2c(
130 ; ALL: icmp eq i32 %A, 0
131 ; ALL: call i32 @llvm.ctlz.i32(i32 %A, i1 true)
132 entry:
133 %tobool = icmp eq i32 %A, 0
134 br i1 %tobool, label %cond.end, label %cond.true
135
136 cond.true: ; preds = %entry
137 %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
138 br label %cond.end
139
140 cond.end: ; preds = %entry, %cond.true
141 %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
142 ret i32 %cond
143 }
144
145
146 define signext i16 @test3c(i16 signext %A) {
147 ; ALL-LABEL: @test3c(
148 ; ALL: icmp eq i16 %A, 0
149 ; ALL: call i16 @llvm.ctlz.i16(i16 %A, i1 true)
150 entry:
151 %tobool = icmp eq i16 %A, 0
152 br i1 %tobool, label %cond.end, label %cond.true
153
154 cond.true: ; preds = %entry
155 %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
156 br label %cond.end
157
158 cond.end: ; preds = %entry, %cond.true
159 %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
160 ret i16 %cond
161 }
162
163
164 define i64 @test1d(i64 %A) {
165 ; ALL-LABEL: @test1d(
166 ; ALL: icmp eq i64 %A, 0
167 ; ALL: call i64 @llvm.cttz.i64(i64 %A, i1 true)
168 entry:
169 %tobool = icmp eq i64 %A, 0
170 br i1 %tobool, label %cond.end, label %cond.true
171
172 cond.true: ; preds = %entry
173 %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
174 br label %cond.end
175
176 cond.end: ; preds = %entry, %cond.true
177 %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
178 ret i64 %cond
179 }
180
181
182 define i32 @test2d(i32 %A) {
183 ; ALL-LABEL: @test2d(
184 ; ALL: icmp eq i32 %A, 0
185 ; ALL: call i32 @llvm.cttz.i32(i32 %A, i1 true)
186 entry:
187 %tobool = icmp eq i32 %A, 0
188 br i1 %tobool, label %cond.end, label %cond.true
189
190 cond.true: ; preds = %entry
191 %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
192 br label %cond.end
193
194 cond.end: ; preds = %entry, %cond.true
195 %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
196 ret i32 %cond
197 }
198
199
200 define signext i16 @test3d(i16 signext %A) {
201 ; ALL-LABEL: @test3d(
202 ; ALL: icmp eq i16 %A, 0
203 ; ALL: call i16 @llvm.cttz.i16(i16 %A, i1 true)
204 entry:
205 %tobool = icmp eq i16 %A, 0
206 br i1 %tobool, label %cond.end, label %cond.true
207
208 cond.true: ; preds = %entry
209 %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
210 br label %cond.end
211
212 cond.end: ; preds = %entry, %cond.true
213 %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
214 ret i16 %cond
215 }
216
217
218 declare i64 @llvm.ctlz.i64(i64, i1)
219 declare i32 @llvm.ctlz.i32(i32, i1)
220 declare i16 @llvm.ctlz.i16(i16, i1)
221 declare i64 @llvm.cttz.i64(i64, i1)
222 declare i32 @llvm.cttz.i32(i32, i1)
223 declare i16 @llvm.cttz.i16(i16, i1)