llvm.org GIT mirror llvm / 232c3d5
[AMDGPU][MC] Corrected several VI opcodes to avoid printing _e64 See bug 32936: https://bugs.llvm.org//show_bug.cgi?id=32936 Reviewers: artem.tamazov, vpykhtin Differential Revision: https://reviews.llvm.org/D33123 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303070 91177308-0d34-0410-b5e6-96231b3b80d8 Dmitry Preobrazhensky 3 years ago
9 changed file(s) with 116 addition(s) and 105 deletion(s). Raw diff Collapse all Expand all
656656 VOP3e_vi (NAME#"_e64").Pfl>;
657657 }
658658
659 multiclass VOP2_Real_e64only_vi op> {
660 def _e64_vi :
661 VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>,
662 VOP3e_vi (NAME#"_e64").Pfl> {
663 // Hack to stop printing _e64
664 VOP3_Pseudo ps = !cast(NAME#"_e64");
665 let OutOperandList = (outs VGPR_32:$vdst);
666 let AsmString = ps.Mnemonic # " " # ps.AsmOperands;
667 }
668 }
669
659670 multiclass Base_VOP2be_Real_e32e64_vi op> : VOP2_Real_e32_vi {
660671 def _e64_vi :
661672 VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>,
723734 defm V_READLANE_B32 : VOP32_Real_vi <0x289>;
724735 defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>;
725736
726 defm V_BFM_B32 : VOP2_Real_e64_vi <0x293>;
727 defm V_BCNT_U32_B32 : VOP2_Real_e64_vi <0x28b>;
728 defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64_vi <0x28c>;
729 defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64_vi <0x28d>;
730 defm V_LDEXP_F32 : VOP2_Real_e64_vi <0x288>;
731 defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>;
732 defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>;
733 defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>;
734 defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64_vi <0x296>;
735 defm V_CVT_PK_U16_U32 : VOP2_Real_e64_vi <0x297>;
736 defm V_CVT_PK_I16_I32 : VOP2_Real_e64_vi <0x298>;
737 defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>;
738 defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>;
739 defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>;
740 defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>;
741 defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>;
742 defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>;
743 defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>;
744 defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>;
745 defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>;
746 defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>;
747 defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>;
737748
738749 defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>;
739750 defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>;
2424 }
2525
2626 ; GCN-LABEL: {{^}}fold_mi_v_or_0:
27 ; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]]
27 ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]]
2828 ; GCN-NOT: [[RESULT]]
2929 ; GCN: buffer_store_dword [[RESULT]]
3030 define amdgpu_kernel void @fold_mi_v_or_0(i32 addrspace(1)* %out) {
4949 }
5050
5151 ; GCN-LABEL: {{^}}fold_mi_v_xor_0:
52 ; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]]
52 ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]]
5353 ; GCN-NOT: [[RESULT]]
5454 ; GCN: buffer_store_dword [[RESULT]]
5555 define amdgpu_kernel void @fold_mi_v_xor_0(i32 addrspace(1)* %out) {
8585 }
8686
8787 ; GCN-LABEL: {{^}}fold_mi_v_not_0:
88 ; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
89 ; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
88 ; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
89 ; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
9090 ; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]]
9191 ; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}}
9292 ; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
103103 ; GCN: buffer_load_dwordx2
104104 ; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}}
105105
106 ; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
107 ; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
106 ; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
107 ; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
108108 ; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]]
109109 ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]]
110110 ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]]
2424 ; XXX - Why 0 in register?
2525 ; FUNC-LABEL: {{^}}v_ctpop_i32:
2626 ; GCN: buffer_load_dword [[VAL:v[0-9]+]],
27 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
27 ; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 0
2828 ; GCN: buffer_store_dword [[RESULT]],
2929 ; GCN: s_endpgm
3030
3939 ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
4040 ; GCN: buffer_load_dword [[VAL1:v[0-9]+]],
4141 ; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
42 ; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
42 ; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
4343 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
44 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
44 ; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
4545 ; GCN: buffer_store_dword [[RESULT]],
4646 ; GCN: s_endpgm
4747
6060 ; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
6161 ; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
6262 ; GCN: s_waitcnt
63 ; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
63 ; GCN-NEXT: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
6464 ; GCN: buffer_store_dword [[RESULT]],
6565 ; GCN: s_endpgm
6666 define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
7272 }
7373
7474 ; FUNC-LABEL: {{^}}v_ctpop_v2i32:
75 ; GCN: v_bcnt_u32_b32_e64
76 ; GCN: v_bcnt_u32_b32_e64
75 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
76 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
7777 ; GCN: s_endpgm
7878
7979 ; EG: BCNT_INT
8686 }
8787
8888 ; FUNC-LABEL: {{^}}v_ctpop_v4i32:
89 ; GCN: v_bcnt_u32_b32_e64
90 ; GCN: v_bcnt_u32_b32_e64
91 ; GCN: v_bcnt_u32_b32_e64
92 ; GCN: v_bcnt_u32_b32_e64
89 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
90 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
91 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
92 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
9393 ; GCN: s_endpgm
9494
9595 ; EG: BCNT_INT
104104 }
105105
106106 ; FUNC-LABEL: {{^}}v_ctpop_v8i32:
107 ; GCN: v_bcnt_u32_b32_e64
108 ; GCN: v_bcnt_u32_b32_e64
109 ; GCN: v_bcnt_u32_b32_e64
110 ; GCN: v_bcnt_u32_b32_e64
111 ; GCN: v_bcnt_u32_b32_e64
112 ; GCN: v_bcnt_u32_b32_e64
113 ; GCN: v_bcnt_u32_b32_e64
114 ; GCN: v_bcnt_u32_b32_e64
107 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
108 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
109 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
110 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
111 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
112 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
113 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
114 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
115115 ; GCN: s_endpgm
116116
117117 ; EG: BCNT_INT
130130 }
131131
132132 ; FUNC-LABEL: {{^}}v_ctpop_v16i32:
133 ; GCN: v_bcnt_u32_b32_e64
134 ; GCN: v_bcnt_u32_b32_e64
135 ; GCN: v_bcnt_u32_b32_e64
136 ; GCN: v_bcnt_u32_b32_e64
137 ; GCN: v_bcnt_u32_b32_e64
138 ; GCN: v_bcnt_u32_b32_e64
139 ; GCN: v_bcnt_u32_b32_e64
140 ; GCN: v_bcnt_u32_b32_e64
141 ; GCN: v_bcnt_u32_b32_e64
142 ; GCN: v_bcnt_u32_b32_e64
143 ; GCN: v_bcnt_u32_b32_e64
144 ; GCN: v_bcnt_u32_b32_e64
145 ; GCN: v_bcnt_u32_b32_e64
146 ; GCN: v_bcnt_u32_b32_e64
147 ; GCN: v_bcnt_u32_b32_e64
148 ; GCN: v_bcnt_u32_b32_e64
133 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
134 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
135 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
136 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
137 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
138 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
139 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
140 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
141 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
142 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
143 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
144 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
145 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
146 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
147 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
148 ; GCN: v_bcnt_u32_b32{{(_e64)*}}
149149 ; GCN: s_endpgm
150150
151151 ; EG: BCNT_INT
173173
174174 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
175175 ; GCN: buffer_load_dword [[VAL:v[0-9]+]],
176 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
176 ; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4
177177 ; GCN: buffer_store_dword [[RESULT]],
178178 ; GCN: s_endpgm
179179
188188
189189 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
190190 ; GCN: buffer_load_dword [[VAL:v[0-9]+]],
191 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
191 ; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4
192192 ; GCN: buffer_store_dword [[RESULT]],
193193 ; GCN: s_endpgm
194194
205205 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
206206 ; GCN-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
207207 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
208 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
208 ; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
209209 ; GCN: buffer_store_dword [[RESULT]],
210210 ; GCN: s_endpgm
211211 define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
219219 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
220220 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
221221 ; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
222 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
222 ; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
223223 ; GCN: buffer_store_dword [[RESULT]],
224224 ; GCN: s_endpgm
225225
235235 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
236236 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
237237 ; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
238 ; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
238 ; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
239239 ; GCN: buffer_store_dword [[RESULT]],
240240 ; GCN: s_endpgm
241241
252252 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}}
253253 ; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16
254254 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
255 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
255 ; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
256256 ; GCN: buffer_store_dword [[RESULT]],
257257 ; GCN: s_endpgm
258258
2525
2626 ; FUNC-LABEL: {{^}}v_ctpop_i64:
2727 ; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
28 ; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
28 ; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
2929 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
30 ; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
30 ; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
3131 ; GCN: buffer_store_dword [[RESULT]],
3232 ; GCN: s_endpgm
3333 define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
4040
4141 ; FUNC-LABEL: {{^}}v_ctpop_i64_user:
4242 ; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
43 ; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
43 ; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
4444 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
45 ; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
45 ; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
4646 ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]]
4747 ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}
4848 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
170170 ; FUNC-LABEL: {{^}}v_ctpop_i128:
171171 ; GCN: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
172172
173 ; GCN-DAG: v_bcnt_u32_b32_e64 [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0
174 ; GCN-DAG: v_bcnt_u32_b32{{_e32|_e64}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]]
173 ; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0
174 ; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]]
175175
176 ; GCN-DAG: v_bcnt_u32_b32_e64 [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0
177 ; GCN-DAG: v_bcnt_u32_b32{{_e32|_e64}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]]
176 ; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0
177 ; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]]
178178
179179 ; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]]
180180
66 ; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}}
77 ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]]
88 ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]]
9 ; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]]
9 ; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[X]], [[VY]]
1010 define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 {
1111 %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
1212 store <2 x half> %result, <2 x half> addrspace(1)* %out
1515
1616 ; GCN-LABEL: {{^}}s_cvt_pkrtz_samereg_v2f16_f32:
1717 ; GCN: s_load_dword [[X:s[0-9]+]]
18 ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[X]]
18 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
1919 define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float %x) #0 {
2020 %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %x)
2121 store <2 x half> %result, <2 x half> addrspace(1)* %out
3838 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
3939 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
4040 ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
41 ; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], [[B]]
41 ; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[A]], [[B]]
4242 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
4343 %tid = call i32 @llvm.amdgcn.workitem.id.x()
4444 %tid.ext = sext i32 %tid to i64
5454
5555 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_reg_imm:
5656 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
57 ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], 1.0
57 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
5858 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
5959 %tid = call i32 @llvm.amdgcn.workitem.id.x()
6060 %tid.ext = sext i32 %tid to i64
6969 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_imm_reg:
7070 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
7171 ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
72 ; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, 1.0, [[A]]
72 ; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, 1.0, [[A]]
7373 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
7474 %tid = call i32 @llvm.amdgcn.workitem.id.x()
7575 %tid.ext = sext i32 %tid to i64
8484 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo:
8585 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
8686 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
87 ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -[[A]], [[B]]
87 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
8888 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
8989 %tid = call i32 @llvm.amdgcn.workitem.id.x()
9090 %tid.ext = sext i32 %tid to i64
102102 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_hi:
103103 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
104104 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
105 ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], -[[B]]
105 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
106106 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
107107 %tid = call i32 @llvm.amdgcn.workitem.id.x()
108108 %tid.ext = sext i32 %tid to i64
120120 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo_hi:
121121 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
122122 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
123 ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -[[A]], -[[B]]
123 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
124124 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
125125 %tid = call i32 @llvm.amdgcn.workitem.id.x()
126126 %tid.ext = sext i32 %tid to i64
139139 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi:
140140 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
141141 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
142 ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -|[[A]]|, -[[B]]
142 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]]
143143 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
144144 %tid = call i32 @llvm.amdgcn.workitem.id.x()
145145 %tid.ext = sext i32 %tid to i64
11 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
22
33 ; GCN-LABEL: {{^}}mbcnt_intrinsics:
4 ; GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0
4 ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[LO:v[0-9]+]], -1, 0
55 ; SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
6 ; VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
6 ; VI: v_mbcnt_hi_u32_b32 {{v[0-9]+}}, -1, [[LO]]
77 define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3) {
88 main_body:
99 %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
242242 v_xor_b32_e32 v1, v2, v3
243243
244244 // SICI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x3c,0xd2,0x02,0x07,0x02,0x00]
245 // VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
245 // VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
246246 v_bfm_b32_e64 v1, v2, v3
247247
248248 // SICI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]
258258 v_madak_f32 v1, v2, v3, 64.0
259259
260260 // SICI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
261 // VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
261 // VI: v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
262262 v_bcnt_u32_b32_e64 v1, v2, v3
263263
264264 // SICI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x46,0xd2,0x02,0x07,0x02,0x00]
265 // VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
265 // VI: v_mbcnt_lo_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
266266 v_mbcnt_lo_u32_b32_e64 v1, v2, v3
267267
268268 // SICI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00]
269 // VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
269 // VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
270270 v_mbcnt_hi_u32_b32_e64 v1, v2, v3
271271
272272 // SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
375375 v_subbrev_u32 v1, s[0:1], v2, v3, vcc
376376
377377 // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
378 // VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]
378 // VI: v_ldexp_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]
379379 v_ldexp_f32 v1, v2, v3
380380
381381 // SICI: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58]
382 // VI: v_cvt_pkaccum_u8_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00]
382 // VI: v_cvt_pkaccum_u8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00]
383383 v_cvt_pkaccum_u8_f32 v1, v2, v3
384384
385385 // SICI: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a]
386 // VI: v_cvt_pknorm_i16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00]
386 // VI: v_cvt_pknorm_i16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00]
387387 v_cvt_pknorm_i16_f32 v1, v2, v3
388388
389389 // SICI: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c]
390 // VI: v_cvt_pknorm_u16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00]
390 // VI: v_cvt_pknorm_u16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00]
391391 v_cvt_pknorm_u16_f32 v1, v2, v3
392392
393393 // SICI: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e]
394 // VI: v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00]
394 // VI: v_cvt_pkrtz_f16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00]
395395 v_cvt_pkrtz_f16_f32 v1, v2, v3
396396
397397 // SICI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x60,0xd2,0x02,0x07,0x02,0x00]
398 // VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
398 // VI: v_cvt_pk_u16_u32 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
399399 v_cvt_pk_u16_u32_e64 v1, v2, v3
400400
401401 // SICI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x62,0xd2,0x02,0x07,0x02,0x00]
402 // VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
402 // VI: v_cvt_pk_i16_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
403403 v_cvt_pk_i16_i32_e64 v1, v2, v3
404404
405405 // NOSICI: error: instruction not supported on this GPU
287287 v_xor_b32 v1, v2, v3
288288
289289 // SICI: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
290 // VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
290 // VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
291291 v_bfm_b32 v1, v2, v3
292292
293293 // SICI: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44]
294 // VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
294 // VI: v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
295295 v_bcnt_u32_b32 v1, v2, v3
296296
297297 // SICI: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46]
298 // VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
298 // VI: v_mbcnt_lo_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
299299 v_mbcnt_lo_u32_b32 v1, v2, v3
300300
301301 // SICI: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48]
302 // VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
302 // VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
303303 v_mbcnt_hi_u32_b32 v1, v2, v3
304304
305305 // SICI: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60]
306 // VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
306 // VI: v_cvt_pk_u16_u32 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
307307 v_cvt_pk_u16_u32 v1, v2, v3
308308
309309 // SICI: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62]
310 // VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
310 // VI: v_cvt_pk_i16_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
311311 v_cvt_pk_i16_i32 v1, v2, v3
312312
313313 // SICI: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
314 // VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
314 // VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
315315 v_bfm_b32 v1, v2, v3
316316
317317 // NOSICI: error: instruction not supported on this GPU
7171 # VI: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a]
7272 0x02 0x07 0x02 0x2a
7373
74 # VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
74 # VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
7575 0x01 0x00 0x93 0xd2 0x02 0x07 0x02 0x00
7676
7777 # VI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
8383 # VI: v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
8484 0x02 0x07 0x02 0x30 0x00 0x00 0x80 0x42
8585
86 # VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
86 # VI: v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
8787 0x01 0x00 0x8b 0xd2 0x02 0x07 0x02 0x00
8888
89 # VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
89 # VI: v_mbcnt_lo_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
9090 0x01 0x00 0x8c 0xd2 0x02 0x07 0x02 0x00
9191
92 # VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
92 # VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
9393 0x01 0x00 0x8d 0xd2 0x02 0x07 0x02 0x00
9494
9595 # VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
170170 # VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01]
171171 0x01 0x00 0x1e 0xd1 0x02 0x07 0xaa 0x01
172172
173 # VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]
173 # VI: v_ldexp_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]
174174 0x01 0x00 0x88 0xd2 0x02 0x07 0x02 0x00
175175
176 # VI: v_cvt_pkaccum_u8_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00]
176 # VI: v_cvt_pkaccum_u8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00]
177177 0x01 0x00 0xf0 0xd1 0x02 0x07 0x02 0x00
178178
179 # VI: v_cvt_pknorm_i16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00]
179 # VI: v_cvt_pknorm_i16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00]
180180 0x01 0x00 0x94 0xd2 0x02 0x07 0x02 0x00
181181
182 # VI: v_cvt_pknorm_u16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00]
182 # VI: v_cvt_pknorm_u16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00]
183183 0x01 0x00 0x95 0xd2 0x02 0x07 0x02 0x00
184184
185 # VI: v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00]
185 # VI: v_cvt_pkrtz_f16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00]
186186 0x01 0x00 0x96 0xd2 0x02 0x07 0x02 0x00
187187
188 # VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
188 # VI: v_cvt_pk_u16_u32 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
189189 0x01 0x00 0x97 0xd2 0x02 0x07 0x02 0x00
190190
191 # VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
191 # VI: v_cvt_pk_i16_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
192192 0x01 0x00 0x98 0xd2 0x02 0x07 0x02 0x00
193193
194194 # VI: v_add_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]