llvm.org GIT mirror llvm / 8750be5
AMDGPU: Fix packing undef parts of build_vector git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@339511 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 6 months ago
5 changed file(s) with 422 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
42864286 }
42874287
42884288 assert(VT == MVT::v2f16 || VT == MVT::v2i16);
4289 assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
42894290
42904291 SDValue Lo = Op.getOperand(0);
42914292 SDValue Hi = Op.getOperand(1);
42924293
4293 Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
4294 // Avoid adding defined bits with the zero_extend.
4295 if (Hi.isUndef()) {
4296 Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
4297 SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
4298 return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
4299 }
4300
42944301 Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
4295
4296 Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
42974302 Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);
42984303
42994304 SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
43004305 DAG.getConstant(16, SL, MVT::i32));
4306 if (Lo.isUndef())
4307 return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);
4308
4309 Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
4310 Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
43014311
43024312 SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
4303
43044313 return DAG.getNode(ISD::BITCAST, SL, VT, Or);
43054314 }
43064315
14601460 def : ExpPattern;
14611461 def : ExpPattern;
14621462
1463 // COPY_TO_REGCLASS is workaround tablegen bug from multiple outputs
1463 // COPY is workaround tablegen bug from multiple outputs
14641464 // from S_LSHL_B32's multiple outputs from implicit scc def.
14651465 def : GCNPat <
14661466 (v2i16 (build_vector (i16 0), i16:$src1)),
1467 (v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0))
1468 >;
1469
1467 (v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
1468 >;
1469
1470 def : GCNPat <
1471 (v2i16 (build_vector i16:$src0, (i16 undef))),
1472 (v2i16 (COPY $src0))
1473 >;
1474
1475 def : GCNPat <
1476 (v2f16 (build_vector f16:$src0, (f16 undef))),
1477 (v2f16 (COPY $src0))
1478 >;
1479
1480 def : GCNPat <
1481 (v2i16 (build_vector (i16 undef), i16:$src1)),
1482 (v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
1483 >;
1484
1485 def : GCNPat <
1486 (v2f16 (build_vector (f16 undef), f16:$src1)),
1487 (v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
1488 >;
14701489
14711490 let SubtargetPredicate = HasVOP3PInsts in {
14721491 def : GCNPat <
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
3
4 define void @undef_lo_v2i16(i16 %arg0) {
5 ; GFX9-LABEL: undef_lo_v2i16:
6 ; GFX9: ; %bb.0:
7 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
9 ; GFX9-NEXT: ;;#ASMSTART
10 ; GFX9-NEXT: ; use v0
11 ; GFX9-NEXT: ;;#ASMEND
12 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13 ;
14 ; GFX8-LABEL: undef_lo_v2i16:
15 ; GFX8: ; %bb.0:
16 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
18 ; GFX8-NEXT: ;;#ASMSTART
19 ; GFX8-NEXT: ; use v0
20 ; GFX8-NEXT: ;;#ASMEND
21 ; GFX8-NEXT: s_setpc_b64 s[30:31]
22 %undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1
23 call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.lo);
24 ret void
25 }
26
27 define void @undef_lo_v2f16(half %arg0) {
28 ; GFX9-LABEL: undef_lo_v2f16:
29 ; GFX9: ; %bb.0:
30 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
32 ; GFX9-NEXT: ;;#ASMSTART
33 ; GFX9-NEXT: ; use v0
34 ; GFX9-NEXT: ;;#ASMEND
35 ; GFX9-NEXT: s_setpc_b64 s[30:31]
36 ;
37 ; GFX8-LABEL: undef_lo_v2f16:
38 ; GFX8: ; %bb.0:
39 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
41 ; GFX8-NEXT: ;;#ASMSTART
42 ; GFX8-NEXT: ; use v0
43 ; GFX8-NEXT: ;;#ASMEND
44 ; GFX8-NEXT: s_setpc_b64 s[30:31]
45 %undef.lo = insertelement <2 x half> undef, half %arg0, i32 1
46 call void asm sideeffect "; use $0", "v"(<2 x half> %undef.lo);
47 ret void
48 }
49
50 define void @undef_lo_op_v2f16(half %arg0) {
51 ; GFX9-LABEL: undef_lo_op_v2f16:
52 ; GFX9: ; %bb.0:
53 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
55 ; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
56 ; GFX9-NEXT: ;;#ASMSTART
57 ; GFX9-NEXT: ; use v0
58 ; GFX9-NEXT: ;;#ASMEND
59 ; GFX9-NEXT: s_setpc_b64 s[30:31]
60 ;
61 ; GFX8-LABEL: undef_lo_op_v2f16:
62 ; GFX8: ; %bb.0:
63 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x3c00
65 ; GFX8-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
66 ; GFX8-NEXT: v_or_b32_e32 v0, 0x7e00, v0
67 ; GFX8-NEXT: ;;#ASMSTART
68 ; GFX8-NEXT: ; use v0
69 ; GFX8-NEXT: ;;#ASMEND
70 ; GFX8-NEXT: s_setpc_b64 s[30:31]
71 %undef.lo = insertelement <2 x half> undef, half %arg0, i32 1
72 %op = fadd <2 x half> %undef.lo,
73 call void asm sideeffect "; use $0", "v"(<2 x half> %op);
74 ret void
75 }
76
77 define void @undef_lo_op_v2i16(i16 %arg0) {
78 ; GFX9-LABEL: undef_lo_op_v2i16:
79 ; GFX9: ; %bb.0:
80 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
82 ; GFX9-NEXT: s_movk_i32 s6, 0x63
83 ; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0]
84 ; GFX9-NEXT: ;;#ASMSTART
85 ; GFX9-NEXT: ; use v0
86 ; GFX9-NEXT: ;;#ASMEND
87 ; GFX9-NEXT: s_setpc_b64 s[30:31]
88 ;
89 ; GFX8-LABEL: undef_lo_op_v2i16:
90 ; GFX8: ; %bb.0:
91 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x63
93 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
94 ; GFX8-NEXT: ;;#ASMSTART
95 ; GFX8-NEXT: ; use v0
96 ; GFX8-NEXT: ;;#ASMEND
97 ; GFX8-NEXT: s_setpc_b64 s[30:31]
98 %undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1
99 %op = add <2 x i16> %undef.lo,
100 call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
101 ret void
102 }
103
104 define void @undef_lo3_v4i16(i16 %arg0) {
105 ; GFX9-LABEL: undef_lo3_v4i16:
106 ; GFX9: ; %bb.0:
107 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
109 ; GFX9-NEXT: ;;#ASMSTART
110 ; GFX9-NEXT: ; use v[0:1]
111 ; GFX9-NEXT: ;;#ASMEND
112 ; GFX9-NEXT: s_setpc_b64 s[30:31]
113 ;
114 ; GFX8-LABEL: undef_lo3_v4i16:
115 ; GFX8: ; %bb.0:
116 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
118 ; GFX8-NEXT: ;;#ASMSTART
119 ; GFX8-NEXT: ; use v[0:1]
120 ; GFX8-NEXT: ;;#ASMEND
121 ; GFX8-NEXT: s_setpc_b64 s[30:31]
122 %undef.lo = insertelement <4 x i16> undef, i16 %arg0, i32 1
123 call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
124 ret void
125 }
126
127 define void @undef_lo3_v4f16(half %arg0) {
128 ; GFX9-LABEL: undef_lo3_v4f16:
129 ; GFX9: ; %bb.0:
130 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
132 ; GFX9-NEXT: ;;#ASMSTART
133 ; GFX9-NEXT: ; use v[0:1]
134 ; GFX9-NEXT: ;;#ASMEND
135 ; GFX9-NEXT: s_setpc_b64 s[30:31]
136 ;
137 ; GFX8-LABEL: undef_lo3_v4f16:
138 ; GFX8: ; %bb.0:
139 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
141 ; GFX8-NEXT: ;;#ASMSTART
142 ; GFX8-NEXT: ; use v[0:1]
143 ; GFX8-NEXT: ;;#ASMEND
144 ; GFX8-NEXT: s_setpc_b64 s[30:31]
145 %undef.lo = insertelement <4 x half> undef, half %arg0, i32 1
146 call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
147 ret void
148 }
149
150 define void @undef_lo2_v4i16(<2 x i16> %arg0) {
151 ; GFX9-LABEL: undef_lo2_v4i16:
152 ; GFX9: ; %bb.0:
153 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
155 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff0000
156 ; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
157 ; GFX9-NEXT: ;;#ASMSTART
158 ; GFX9-NEXT: ; use v[0:1]
159 ; GFX9-NEXT: ;;#ASMEND
160 ; GFX9-NEXT: s_setpc_b64 s[30:31]
161 ;
162 ; GFX8-LABEL: undef_lo2_v4i16:
163 ; GFX8: ; %bb.0:
164 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
166 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
167 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
168 ; GFX8-NEXT: ;;#ASMSTART
169 ; GFX8-NEXT: ; use v[0:1]
170 ; GFX8-NEXT: ;;#ASMEND
171 ; GFX8-NEXT: s_setpc_b64 s[30:31]
172 %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32>
173 call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
174 ret void
175 }
176
177 define void @undef_lo2_v4f16(<2 x half> %arg0) {
178 ; GFX9-LABEL: undef_lo2_v4f16:
179 ; GFX9: ; %bb.0:
180 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
182 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v0
183 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
184 ; GFX9-NEXT: ;;#ASMSTART
185 ; GFX9-NEXT: ; use v[0:1]
186 ; GFX9-NEXT: ;;#ASMEND
187 ; GFX9-NEXT: s_setpc_b64 s[30:31]
188 ;
189 ; GFX8-LABEL: undef_lo2_v4f16:
190 ; GFX8: ; %bb.0:
191 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
193 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
194 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
195 ; GFX8-NEXT: ;;#ASMSTART
196 ; GFX8-NEXT: ; use v[0:1]
197 ; GFX8-NEXT: ;;#ASMEND
198 ; GFX8-NEXT: s_setpc_b64 s[30:31]
199 %undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32>
200 call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
201 ret void
202 }
203
204 define void @undef_hi_v2i16(i16 %arg0) {
205 ; GFX9-LABEL: undef_hi_v2i16:
206 ; GFX9: ; %bb.0:
207 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208 ; GFX9-NEXT: ;;#ASMSTART
209 ; GFX9-NEXT: ; use v0
210 ; GFX9-NEXT: ;;#ASMEND
211 ; GFX9-NEXT: s_setpc_b64 s[30:31]
212 ;
213 ; GFX8-LABEL: undef_hi_v2i16:
214 ; GFX8: ; %bb.0:
215 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216 ; GFX8-NEXT: ;;#ASMSTART
217 ; GFX8-NEXT: ; use v0
218 ; GFX8-NEXT: ;;#ASMEND
219 ; GFX8-NEXT: s_setpc_b64 s[30:31]
220 %undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0
221 call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.hi);
222 ret void
223 }
224
225 define void @undef_hi_v2f16(half %arg0) {
226 ; GFX9-LABEL: undef_hi_v2f16:
227 ; GFX9: ; %bb.0:
228 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229 ; GFX9-NEXT: ;;#ASMSTART
230 ; GFX9-NEXT: ; use v0
231 ; GFX9-NEXT: ;;#ASMEND
232 ; GFX9-NEXT: s_setpc_b64 s[30:31]
233 ;
234 ; GFX8-LABEL: undef_hi_v2f16:
235 ; GFX8: ; %bb.0:
236 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237 ; GFX8-NEXT: ;;#ASMSTART
238 ; GFX8-NEXT: ; use v0
239 ; GFX8-NEXT: ;;#ASMEND
240 ; GFX8-NEXT: s_setpc_b64 s[30:31]
241 %undef.hi = insertelement <2 x half> undef, half %arg0, i32 0
242 call void asm sideeffect "; use $0", "v"(<2 x half> %undef.hi);
243 ret void
244 }
245
246 define void @undef_hi_op_v2f16(half %arg0) {
247 ; GFX9-LABEL: undef_hi_op_v2f16:
248 ; GFX9: ; %bb.0:
249 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250 ; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
251 ; GFX9-NEXT: ;;#ASMSTART
252 ; GFX9-NEXT: ; use v0
253 ; GFX9-NEXT: ;;#ASMEND
254 ; GFX9-NEXT: s_setpc_b64 s[30:31]
255 ;
256 ; GFX8-LABEL: undef_hi_op_v2f16:
257 ; GFX8: ; %bb.0:
258 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259 ; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0
260 ; GFX8-NEXT: v_or_b32_e32 v0, 0x7e000000, v0
261 ; GFX8-NEXT: ;;#ASMSTART
262 ; GFX8-NEXT: ; use v0
263 ; GFX8-NEXT: ;;#ASMEND
264 ; GFX8-NEXT: s_setpc_b64 s[30:31]
265 %undef.hi = insertelement <2 x half> undef, half %arg0, i32 0
266 %op = fadd <2 x half> %undef.hi,
267 call void asm sideeffect "; use $0", "v"(<2 x half> %op);
268 ret void
269 }
270
271 define void @undef_hi_op_v2i16(i16 %arg0) {
272 ; GFX9-LABEL: undef_hi_op_v2i16:
273 ; GFX9: ; %bb.0:
274 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275 ; GFX9-NEXT: s_movk_i32 s6, 0x63
276 ; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0]
277 ; GFX9-NEXT: ;;#ASMSTART
278 ; GFX9-NEXT: ; use v0
279 ; GFX9-NEXT: ;;#ASMEND
280 ; GFX9-NEXT: s_setpc_b64 s[30:31]
281 ;
282 ; GFX8-LABEL: undef_hi_op_v2i16:
283 ; GFX8: ; %bb.0:
284 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285 ; GFX8-NEXT: v_add_u16_e32 v0, 0x63, v0
286 ; GFX8-NEXT: ;;#ASMSTART
287 ; GFX8-NEXT: ; use v0
288 ; GFX8-NEXT: ;;#ASMEND
289 ; GFX8-NEXT: s_setpc_b64 s[30:31]
290 %undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0
291 %op = add <2 x i16> %undef.hi,
292 call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
293 ret void
294 }
295
296 define void @undef_hi3_v4i16(i16 %arg0) {
297 ; GFX9-LABEL: undef_hi3_v4i16:
298 ; GFX9: ; %bb.0:
299 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
300 ; GFX9-NEXT: ;;#ASMSTART
301 ; GFX9-NEXT: ; use v[0:1]
302 ; GFX9-NEXT: ;;#ASMEND
303 ; GFX9-NEXT: s_setpc_b64 s[30:31]
304 ;
305 ; GFX8-LABEL: undef_hi3_v4i16:
306 ; GFX8: ; %bb.0:
307 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308 ; GFX8-NEXT: ;;#ASMSTART
309 ; GFX8-NEXT: ; use v[0:1]
310 ; GFX8-NEXT: ;;#ASMEND
311 ; GFX8-NEXT: s_setpc_b64 s[30:31]
312 %undef.hi = insertelement <4 x i16> undef, i16 %arg0, i32 0
313 call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
314 ret void
315 }
316
317 define void @undef_hi3_v4f16(half %arg0) {
318 ; GFX9-LABEL: undef_hi3_v4f16:
319 ; GFX9: ; %bb.0:
320 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
321 ; GFX9-NEXT: ;;#ASMSTART
322 ; GFX9-NEXT: ; use v[0:1]
323 ; GFX9-NEXT: ;;#ASMEND
324 ; GFX9-NEXT: s_setpc_b64 s[30:31]
325 ;
326 ; GFX8-LABEL: undef_hi3_v4f16:
327 ; GFX8: ; %bb.0:
328 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329 ; GFX8-NEXT: ;;#ASMSTART
330 ; GFX8-NEXT: ; use v[0:1]
331 ; GFX8-NEXT: ;;#ASMEND
332 ; GFX8-NEXT: s_setpc_b64 s[30:31]
333 %undef.hi = insertelement <4 x half> undef, half %arg0, i32 0
334 call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
335 ret void
336 }
337
338 define void @undef_hi2_v4i16(<2 x i16> %arg0) {
339 ; GFX9-LABEL: undef_hi2_v4i16:
340 ; GFX9: ; %bb.0:
341 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342 ; GFX9-NEXT: ;;#ASMSTART
343 ; GFX9-NEXT: ; use v[0:1]
344 ; GFX9-NEXT: ;;#ASMEND
345 ; GFX9-NEXT: s_setpc_b64 s[30:31]
346 ;
347 ; GFX8-LABEL: undef_hi2_v4i16:
348 ; GFX8: ; %bb.0:
349 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350 ; GFX8-NEXT: ;;#ASMSTART
351 ; GFX8-NEXT: ; use v[0:1]
352 ; GFX8-NEXT: ;;#ASMEND
353 ; GFX8-NEXT: s_setpc_b64 s[30:31]
354 %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32>
355 call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
356 ret void
357 }
358
359 define void @undef_hi2_v4f16(<2 x half> %arg0) {
360 ; GFX9-LABEL: undef_hi2_v4f16:
361 ; GFX9: ; %bb.0:
362 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363 ; GFX9-NEXT: ;;#ASMSTART
364 ; GFX9-NEXT: ; use v[0:1]
365 ; GFX9-NEXT: ;;#ASMEND
366 ; GFX9-NEXT: s_setpc_b64 s[30:31]
367 ;
368 ; GFX8-LABEL: undef_hi2_v4f16:
369 ; GFX8: ; %bb.0:
370 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371 ; GFX8-NEXT: ;;#ASMSTART
372 ; GFX8-NEXT: ; use v[0:1]
373 ; GFX8-NEXT: ;;#ASMEND
374 ; GFX8-NEXT: s_setpc_b64 s[30:31]
375 %undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32>
376 call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
377 ret void
378 }
379
401401 ; FIXME: materialize constant directly in VGPR
402402 ; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm:
403403 ; GFX9-DAG: s_mov_b32 [[K01:s[0-9]+]], 0x20001
404 ; GFX9-DAG: s_pack_ll_b32_b16 [[K23:s[0-9]+]], 3, s{{[0-9]+}}
404 ; GFX9-DAG: s_mov_b32 [[K2:s[0-9]+]], 3
405405 ; GFX9: v_mov_b32_e32 v0, [[K01]]
406 ; GFX9: v_mov_b32_e32 v1, [[K23]]
406 ; GFX9: v_mov_b32_e32 v1, [[K2]]
407407 ; GFX9: s_swappc_b64
408408 define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
409409 call void @external_void_func_v3i16(<3 x i16> )
8282 }
8383
8484 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
85 ; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
86 ; GFX9: v_cvt_f16_f32_e32 v0, v0
85 ; GCN: s_waitcnt
86 ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
87 ; GFX9-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
88 ; GFX9-NEXT: s_setpc_b64
8789 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 {
8890 %src0.ext = fpext half %src0 to float
8991 %src1.ext = fpext half %src1 to float