llvm.org GIT mirror llvm / 7341697
AMDGPU: Introduce a flag to disable mul24 intrinsic formation git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369856 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 29 days ago
2 changed file(s) with 172 addition(s) and 55 deletion(s). Raw diff Collapse all Expand all
5454 cl::ReallyHidden,
5555 cl::init(true));
5656
57 static cl::opt UseMul24Intrin(
58 "amdgpu-codegenprepare-mul24",
59 cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
60 cl::ReallyHidden,
61 cl::init(true));
62
5763 class AMDGPUCodeGenPrepare : public FunctionPass,
5864 public InstVisitor {
5965 const GCNSubtarget *ST = nullptr;
878884 DA->isUniform(&I) && promoteUniformOpToI32(I))
879885 return true;
880886
881 if (replaceMulWithMul24(I))
887 if (UseMul24Intrin && replaceMulWithMul24(I))
882888 return true;
883889
884890 bool Changed = false;
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
11 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s
22 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=fiji -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s
3 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=fiji -amdgpu-codegenprepare-mul24=0 -amdgpu-codegenprepare %s | FileCheck -check-prefix=DISABLED %s
34
45 define i16 @mul_i16(i16 %lhs, i16 %rhs) {
56 ; SI-LABEL: @mul_i16(
67 ; SI-NEXT: [[TMP1:%.*]] = zext i16 [[LHS:%.*]] to i32
78 ; SI-NEXT: [[TMP2:%.*]] = zext i16 [[RHS:%.*]] to i32
89 ; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
9 ; SI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
10 ; SI-NEXT: ret i16 [[TMP4]]
10 ; SI-NEXT: [[MUL:%.*]] = trunc i32 [[TMP3]] to i16
11 ; SI-NEXT: ret i16 [[MUL]]
1112 ;
1213 ; VI-LABEL: @mul_i16(
1314 ; VI-NEXT: [[MUL:%.*]] = mul i16 [[LHS:%.*]], [[RHS:%.*]]
1415 ; VI-NEXT: ret i16 [[MUL]]
16 ;
17 ; DISABLED-LABEL: @mul_i16(
18 ; DISABLED-NEXT: [[MUL:%.*]] = mul i16 [[LHS:%.*]], [[RHS:%.*]]
19 ; DISABLED-NEXT: ret i16 [[MUL]]
1520 ;
1621 %mul = mul i16 %lhs, %rhs
1722 ret i16 %mul
2328 ; SI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
2429 ; SI-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 8
2530 ; SI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8
26 ; SI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
27 ; SI-NEXT: ret i32 [[TMP1]]
31 ; SI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
32 ; SI-NEXT: ret i32 [[MUL]]
2833 ;
2934 ; VI-LABEL: @smul24_i32(
3035 ; VI-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 8
3136 ; VI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
3237 ; VI-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 8
3338 ; VI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8
34 ; VI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
35 ; VI-NEXT: ret i32 [[TMP1]]
39 ; VI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
40 ; VI-NEXT: ret i32 [[MUL]]
41 ;
42 ; DISABLED-LABEL: @smul24_i32(
43 ; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 8
44 ; DISABLED-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
45 ; DISABLED-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 8
46 ; DISABLED-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8
47 ; DISABLED-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
48 ; DISABLED-NEXT: ret i32 [[MUL]]
3649 ;
3750 %shl.lhs = shl i32 %lhs, 8
3851 %lhs24 = ashr i32 %shl.lhs, 8
5568 ; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
5669 ; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
5770 ; SI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
58 ; SI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
59 ; SI-NEXT: ret <2 x i32> [[TMP8]]
71 ; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
72 ; SI-NEXT: ret <2 x i32> [[MUL]]
6073 ;
6174 ; VI-LABEL: @smul24_v2i32(
6275 ; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]],
7083 ; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
7184 ; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
7285 ; VI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
73 ; VI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
74 ; VI-NEXT: ret <2 x i32> [[TMP8]]
86 ; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
87 ; VI-NEXT: ret <2 x i32> [[MUL]]
88 ;
89 ; DISABLED-LABEL: @smul24_v2i32(
90 ; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]],
91 ; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]],
92 ; DISABLED-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]],
93 ; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]],
94 ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]]
95 ; DISABLED-NEXT: ret <2 x i32> [[MUL]]
7596 ;
7697 %shl.lhs = shl <2 x i32> %lhs,
7798 %lhs24 = ashr <2 x i32> %shl.lhs,
85106 ; SI-LABEL: @umul24_i32(
86107 ; SI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215
87108 ; SI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215
88 ; SI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
89 ; SI-NEXT: ret i32 [[TMP1]]
109 ; SI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
110 ; SI-NEXT: ret i32 [[MUL]]
90111 ;
91112 ; VI-LABEL: @umul24_i32(
92113 ; VI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215
93114 ; VI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215
94 ; VI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
95 ; VI-NEXT: ret i32 [[TMP1]]
115 ; VI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
116 ; VI-NEXT: ret i32 [[MUL]]
117 ;
118 ; DISABLED-LABEL: @umul24_i32(
119 ; DISABLED-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215
120 ; DISABLED-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215
121 ; DISABLED-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
122 ; DISABLED-NEXT: ret i32 [[MUL]]
96123 ;
97124 %lhs24 = and i32 %lhs, 16777215
98125 %rhs24 = and i32 %rhs, 16777215
111138 ; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
112139 ; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
113140 ; SI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
114 ; SI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
115 ; SI-NEXT: ret <2 x i32> [[TMP8]]
141 ; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
142 ; SI-NEXT: ret <2 x i32> [[MUL]]
116143 ;
117144 ; VI-LABEL: @umul24_v2i32(
118145 ; VI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]],
124151 ; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
125152 ; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
126153 ; VI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
127 ; VI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
128 ; VI-NEXT: ret <2 x i32> [[TMP8]]
154 ; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
155 ; VI-NEXT: ret <2 x i32> [[MUL]]
156 ;
157 ; DISABLED-LABEL: @umul24_v2i32(
158 ; DISABLED-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]],
159 ; DISABLED-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]],
160 ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]]
161 ; DISABLED-NEXT: ret <2 x i32> [[MUL]]
129162 ;
130163 %lhs24 = and <2 x i32> %lhs,
131164 %rhs24 = and <2 x i32> %rhs,
142175 ; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
143176 ; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
144177 ; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
145 ; SI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
146 ; SI-NEXT: ret i64 [[TMP4]]
178 ; SI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i64
179 ; SI-NEXT: ret i64 [[MUL]]
147180 ;
148181 ; VI-LABEL: @smul24_i64(
149182 ; VI-NEXT: [[SHL_LHS:%.*]] = shl i64 [[LHS:%.*]], 40
153186 ; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
154187 ; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
155188 ; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
156 ; VI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
157 ; VI-NEXT: ret i64 [[TMP4]]
189 ; VI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i64
190 ; VI-NEXT: ret i64 [[MUL]]
191 ;
192 ; DISABLED-LABEL: @smul24_i64(
193 ; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl i64 [[LHS:%.*]], 40
194 ; DISABLED-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40
195 ; DISABLED-NEXT: [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40
196 ; DISABLED-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40
197 ; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
198 ; DISABLED-NEXT: ret i64 [[MUL]]
158199 ;
159200 %shl.lhs = shl i64 %lhs, 40
160201 %lhs24 = ashr i64 %shl.lhs, 40
171212 ; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
172213 ; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
173214 ; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
174 ; SI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
175 ; SI-NEXT: ret i64 [[TMP4]]
215 ; SI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i64
216 ; SI-NEXT: ret i64 [[MUL]]
176217 ;
177218 ; VI-LABEL: @umul24_i64(
178219 ; VI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215
180221 ; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
181222 ; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
182223 ; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
183 ; VI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
184 ; VI-NEXT: ret i64 [[TMP4]]
224 ; VI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i64
225 ; VI-NEXT: ret i64 [[MUL]]
226 ;
227 ; DISABLED-LABEL: @umul24_i64(
228 ; DISABLED-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215
229 ; DISABLED-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215
230 ; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
231 ; DISABLED-NEXT: ret i64 [[MUL]]
185232 ;
186233 %lhs24 = and i64 %lhs, 16777215
187234 %rhs24 = and i64 %rhs, 16777215
198245 ; SI-NEXT: [[TMP1:%.*]] = sext i31 [[LHS24]] to i32
199246 ; SI-NEXT: [[TMP2:%.*]] = sext i31 [[RHS24]] to i32
200247 ; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
201 ; SI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
202 ; SI-NEXT: ret i31 [[TMP4]]
248 ; SI-NEXT: [[MUL:%.*]] = trunc i32 [[TMP3]] to i31
249 ; SI-NEXT: ret i31 [[MUL]]
203250 ;
204251 ; VI-LABEL: @smul24_i31(
205252 ; VI-NEXT: [[SHL_LHS:%.*]] = shl i31 [[LHS:%.*]], 7
209256 ; VI-NEXT: [[TMP1:%.*]] = sext i31 [[LHS24]] to i32
210257 ; VI-NEXT: [[TMP2:%.*]] = sext i31 [[RHS24]] to i32
211258 ; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
212 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
213 ; VI-NEXT: ret i31 [[TMP4]]
259 ; VI-NEXT: [[MUL:%.*]] = trunc i32 [[TMP3]] to i31
260 ; VI-NEXT: ret i31 [[MUL]]
261 ;
262 ; DISABLED-LABEL: @smul24_i31(
263 ; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl i31 [[LHS:%.*]], 7
264 ; DISABLED-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7
265 ; DISABLED-NEXT: [[LSHR_RHS:%.*]] = shl i31 [[RHS:%.*]], 7
266 ; DISABLED-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7
267 ; DISABLED-NEXT: [[MUL:%.*]] = mul i31 [[LHS24]], [[RHS24]]
268 ; DISABLED-NEXT: ret i31 [[MUL]]
214269 ;
215270 %shl.lhs = shl i31 %lhs, 7
216271 %lhs24 = ashr i31 %shl.lhs, 7
227282 ; SI-NEXT: [[TMP1:%.*]] = zext i31 [[LHS24]] to i32
228283 ; SI-NEXT: [[TMP2:%.*]] = zext i31 [[RHS24]] to i32
229284 ; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
230 ; SI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
231 ; SI-NEXT: ret i31 [[TMP4]]
285 ; SI-NEXT: [[MUL:%.*]] = trunc i32 [[TMP3]] to i31
286 ; SI-NEXT: ret i31 [[MUL]]
232287 ;
233288 ; VI-LABEL: @umul24_i31(
234289 ; VI-NEXT: [[LHS24:%.*]] = and i31 [[LHS:%.*]], 16777215
236291 ; VI-NEXT: [[TMP1:%.*]] = zext i31 [[LHS24]] to i32
237292 ; VI-NEXT: [[TMP2:%.*]] = zext i31 [[RHS24]] to i32
238293 ; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
239 ; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
240 ; VI-NEXT: ret i31 [[TMP4]]
294 ; VI-NEXT: [[MUL:%.*]] = trunc i32 [[TMP3]] to i31
295 ; VI-NEXT: ret i31 [[MUL]]
296 ;
297 ; DISABLED-LABEL: @umul24_i31(
298 ; DISABLED-NEXT: [[LHS24:%.*]] = and i31 [[LHS:%.*]], 16777215
299 ; DISABLED-NEXT: [[RHS24:%.*]] = and i31 [[RHS:%.*]], 16777215
300 ; DISABLED-NEXT: [[MUL:%.*]] = mul i31 [[LHS24]], [[RHS24]]
301 ; DISABLED-NEXT: ret i31 [[MUL]]
241302 ;
242303 %lhs24 = and i31 %lhs, 16777215
243304 %rhs24 = and i31 %rhs, 16777215
262323 ; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]])
263324 ; SI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
264325 ; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
265 ; SI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
266 ; SI-NEXT: ret <2 x i31> [[TMP14]]
326 ; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
327 ; SI-NEXT: ret <2 x i31> [[MUL]]
267328 ;
268329 ; VI-LABEL: @umul24_v2i31(
269330 ; VI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]],
281342 ; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]])
282343 ; VI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
283344 ; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
284 ; VI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
285 ; VI-NEXT: ret <2 x i31> [[TMP14]]
345 ; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
346 ; VI-NEXT: ret <2 x i31> [[MUL]]
347 ;
348 ; DISABLED-LABEL: @umul24_v2i31(
349 ; DISABLED-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]],
350 ; DISABLED-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]],
351 ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i31> [[LHS24]], [[RHS24]]
352 ; DISABLED-NEXT: ret <2 x i31> [[MUL]]
286353 ;
287354 %lhs24 = and <2 x i31> %lhs,
288355 %rhs24 = and <2 x i31> %rhs,
309376 ; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
310377 ; SI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
311378 ; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
312 ; SI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
313 ; SI-NEXT: ret <2 x i31> [[TMP14]]
379 ; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
380 ; SI-NEXT: ret <2 x i31> [[MUL]]
314381 ;
315382 ; VI-LABEL: @smul24_v2i31(
316383 ; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]],
330397 ; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
331398 ; VI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
332399 ; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
333 ; VI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
334 ; VI-NEXT: ret <2 x i31> [[TMP14]]
400 ; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
401 ; VI-NEXT: ret <2 x i31> [[MUL]]
402 ;
403 ; DISABLED-LABEL: @smul24_v2i31(
404 ; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]],
405 ; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]],
406 ; DISABLED-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]],
407 ; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]],
408 ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i31> [[LHS24]], [[RHS24]]
409 ; DISABLED-NEXT: ret <2 x i31> [[MUL]]
335410 ;
336411 %shl.lhs = shl <2 x i31> %lhs,
337412 %lhs24 = ashr <2 x i31> %shl.lhs,
350425 ; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
351426 ; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
352427 ; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
353 ; SI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i33
354 ; SI-NEXT: ret i33 [[TMP4]]
428 ; SI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i33
429 ; SI-NEXT: ret i33 [[MUL]]
355430 ;
356431 ; VI-LABEL: @smul24_i33(
357432 ; VI-NEXT: [[SHL_LHS:%.*]] = shl i33 [[LHS:%.*]], 9
361436 ; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
362437 ; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
363438 ; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
364 ; VI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i33
365 ; VI-NEXT: ret i33 [[TMP4]]
439 ; VI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i33
440 ; VI-NEXT: ret i33 [[MUL]]
441 ;
442 ; DISABLED-LABEL: @smul24_i33(
443 ; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl i33 [[LHS:%.*]], 9
444 ; DISABLED-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9
445 ; DISABLED-NEXT: [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9
446 ; DISABLED-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9
447 ; DISABLED-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]]
448 ; DISABLED-NEXT: ret i33 [[MUL]]
366449 ;
367450 %shl.lhs = shl i33 %lhs, 9
368451 %lhs24 = ashr i33 %shl.lhs, 9
379462 ; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
380463 ; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
381464 ; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
382 ; SI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i33
383 ; SI-NEXT: ret i33 [[TMP4]]
465 ; SI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i33
466 ; SI-NEXT: ret i33 [[MUL]]
384467 ;
385468 ; VI-LABEL: @umul24_i33(
386469 ; VI-NEXT: [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215
388471 ; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
389472 ; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
390473 ; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
391 ; VI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i33
392 ; VI-NEXT: ret i33 [[TMP4]]
474 ; VI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i33
475 ; VI-NEXT: ret i33 [[MUL]]
476 ;
477 ; DISABLED-LABEL: @umul24_i33(
478 ; DISABLED-NEXT: [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215
479 ; DISABLED-NEXT: [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215
480 ; DISABLED-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]]
481 ; DISABLED-NEXT: ret i33 [[MUL]]
393482 ;
394483 %lhs24 = and i33 %lhs, 16777215
395484 %rhs24 = and i33 %rhs, 16777215
414503 ; VI-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
415504 ; VI-NEXT: ret i32 [[MUL]]
416505 ;
506 ; DISABLED-LABEL: @smul25_i32(
507 ; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 7
508 ; DISABLED-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 7
509 ; DISABLED-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 7
510 ; DISABLED-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 7
511 ; DISABLED-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
512 ; DISABLED-NEXT: ret i32 [[MUL]]
513 ;
417514 %shl.lhs = shl i32 %lhs, 7
418515 %lhs24 = ashr i32 %shl.lhs, 7
419516 %lshr.rhs = shl i32 %rhs, 7
434531 ; VI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 33554431
435532 ; VI-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
436533 ; VI-NEXT: ret i32 [[MUL]]
534 ;
535 ; DISABLED-LABEL: @umul25_i32(
536 ; DISABLED-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 33554431
537 ; DISABLED-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 33554431
538 ; DISABLED-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
539 ; DISABLED-NEXT: ret i32 [[MUL]]
437540 ;
438541 %lhs24 = and i32 %lhs, 33554431
439542 %rhs24 = and i32 %rhs, 33554431
460563 ; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
461564 ; SI-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i33
462565 ; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0
463 ; SI-NEXT: [[TMP14:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1
464 ; SI-NEXT: ret <2 x i33> [[TMP14]]
566 ; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1
567 ; SI-NEXT: ret <2 x i33> [[MUL]]
465568 ;
466569 ; VI-LABEL: @smul24_v2i33(
467570 ; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]],
481584 ; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
482585 ; VI-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i33
483586 ; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0
484 ; VI-NEXT: [[TMP14:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1
485 ; VI-NEXT: ret <2 x i33> [[TMP14]]
587 ; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1
588 ; VI-NEXT: ret <2 x i33> [[MUL]]
589 ;
590 ; DISABLED-LABEL: @smul24_v2i33(
591 ; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]],
592 ; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]],
593 ; DISABLED-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]],
594 ; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]],
595 ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]]
596 ; DISABLED-NEXT: ret <2 x i33> [[MUL]]
486597 ;
487598 %shl.lhs = shl <2 x i33> %lhs,
488599 %lhs24 = ashr <2 x i33> %shl.lhs,