llvm.org GIT mirror llvm / 820985a
AMDGPU: Add sdst operand to VOP2b instructions The VOP3 encoding of these allows any SGPR pair for the i1 output, but this was forced before to always use vcc. This doesn't yet try to use this, but does add the operand to the definitions so the main change is adding vcc to the output of the VOP2 encoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246358 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 5 years ago
18 changed file(s) with 175 addition(s) and 118 deletion(s). Raw diff Collapse all Expand all
10901090
10911091 field dag Outs = (outs DstRC:$dst);
10921092
1093 // VOP3b instructions are a special case with a second explicit
1094 // output. This is manually overridden for them.
1095 field dag Outs32 = Outs;
1096 field dag Outs64 = Outs;
1097
10931098 field dag Ins32 = getIns32.ret;
10941099 field dag Ins64 = getIns64
10951100 HasModifiers>.ret;
11251130 def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
11261131 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
11271132 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
1128 def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
1133
1134 class VOP2b_Profile : VOPProfile<[vt, vt, vt, untyped]> {
1135 let Asm32 = "$dst, vcc, $src0, $src1";
1136 let Asm64 = "$dst, $sdst, $src0, $src1";
1137 let Outs32 = (outs DstRC:$dst);
1138 let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
1139 }
1140
1141 def VOP2b_I32_I1_I32_I32 : VOP2b_Profile;
1142
1143 def VOP2b_I32_I1_I32_I32_VCC : VOP2b_Profile {
11291144 let Src0RC32 = VCSrc_32;
11301145 }
11311146
14211436 def "" : VOP3_Pseudo ,
14221437 VOP2_REV;
14231438
1424 // The VOP2 variant puts the carry out into VCC, the VOP3 variant
1425 // can write it into any SGPR. We currently don't use the carry out,
1426 // so for now hardcode it to VCC as well.
1427 let sdst = SIOperand.VCC, Defs = [VCC] in {
1428 def _si : VOP3b_Real_si ,
1429 VOP3DisableFields<1, 0, HasMods>;
1430
1431 def _vi : VOP3b_Real_vi ,
1432 VOP3DisableFields<1, 0, HasMods>;
1433 } // End sdst = SIOperand.VCC, Defs = [VCC]
1439 def _si : VOP3b_Real_si ,
1440 VOP3DisableFields<1, 0, HasMods>;
1441
1442 def _vi : VOP3b_Real_vi ,
1443 VOP3DisableFields<1, 0, HasMods>;
14341444 }
14351445
14361446 multiclass VOP3b_3_m
15611571 opName, revOp, P.HasModifiers>;
15621572 }
15631573
1564 multiclass VOP2b_Helper ,
1574 multiclass VOP2b_Helper 32, dag outs64,
15651575 dag ins32, string asm32, list pat32,
15661576 dag ins64, string asm64, list pat64,
15671577 string revOp, bit HasMods> {
15681578
1569 defm _e32 : VOP2_m , ins32, asm32, pat32, opName, revOp>;
1579 defm _e32 : VOP2_m 32, ins32, asm32, pat32, opName, revOp>;
15701580
15711581 defm _e64 : VOP3b_2_m
1572 outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
1582 outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods
15731583 >;
15741584 }
15751585
15761586 multiclass VOP2bInst
15771587 SDPatternOperator node = null_frag,
15781588 string revOp = opName> : VOP2b_Helper <
1579 op, opName, P.Outs,
1589 op, opName, P.Outs32, P.Outs64,
15801590 P.Ins32, P.Asm32, [],
15811591 P.Ins64, P.Asm64,
15821592 !if(P.HasModifiers,
15131513 // V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
15141514 // but the VI instructions behave the same as the SI versions.
15151515 defm V_ADD_I32 : VOP2bInst , "v_add_i32",
1516 VOP_I32_I32_I32, add
1517 >;
1518 defm V_SUB_I32 : VOP2bInst , "v_sub_i32", VOP_I32_I32_I32>;
1516 VOP2b_I32_I1_I32_I32
1517 >;
1518 defm V_SUB_I32 : VOP2bInst , "v_sub_i32", VOP2b_I32_I1_I32_I32>;
15191519
15201520 defm V_SUBREV_I32 : VOP2bInst , "v_subrev_i32",
1521 VOP_I32_I32_I32, null_frag, "v_sub_i32"
1521 VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
15221522 >;
15231523
15241524 let Uses = [VCC] in { // Carry-in comes from VCC
15251525 defm V_ADDC_U32 : VOP2bInst , "v_addc_u32",
1526 VOP_I32_I32_I32_VCC
1526 VOP2b_I32_I1_I32_I32_VCC
15271527 >;
15281528 defm V_SUBB_U32 : VOP2bInst , "v_subb_u32",
1529 VOP_I32_I32_I32_VCC
1529 VOP2b_I32_I1_I32_I32_VCC
15301530 >;
15311531 defm V_SUBBREV_U32 : VOP2bInst , "v_subbrev_u32",
1532 VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
1532 VOP2b_I32_I1_I32_I32_VCC, null_frag, "v_subb_u32"
15331533 >;
15341534
15351535 } // End Uses = [VCC]
44 ;FUNC-LABEL: {{^}}test1:
55 ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
66
7 ;SI: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
7 ;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}}
88 ;SI-NOT: [[REG]]
99 ;SI: buffer_store_dword [[REG]],
1010 define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
2020 ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2121 ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2222
23 ;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
24 ;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
23 ;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
24 ;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
2525
2626 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
2727 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
3838 ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
3939 ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
4040
41 ;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
42 ;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
43 ;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
44 ;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
41 ;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
42 ;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
43 ;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
44 ;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
4545
4646 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
4747 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
1313 ; FIXME: We end up with zero argument for ADD, because
1414 ; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
1515 ; with the appropriate offset. We should fold this into the store.
16 ; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
16 ; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}}
1717 ; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
1818 ;
1919 ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
2121 ; to interpret:
2222 ; getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
2323
24 ; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
24 ; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16
2525 ; SI-PROMOTE: ds_write_b32 [[PTRREG]]
2626 define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
2727 %alloca = alloca [4 x i32], i32 4, align 16
153153
154154 ; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
155155 ; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
156 ; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
156 ; SI: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
157157 ; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
158158 ; SI: buffer_store_dword [[CONV]],
159159 define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
99 ; CHECK: BB0_1:
1010 ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]],
1111 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]]
12 ; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
12 ; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], vcc, 4, [[VADDR]]
1313 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]]
14 ; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
14 ; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]]
1515 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]]
16 ; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
16 ; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], vcc, 0x84, [[VADDR]]
1717 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]]
18 ; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
18 ; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]]
1919 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
2020
2121 ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1
6464
6565 ; SI-LABEL: @simple_read2st64_f32_over_max_offset
6666 ; SI-NOT: ds_read2st64_b32
67 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
67 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
6868 ; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
6969 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
7070 ; SI: s_endpgm
196196
197197 ; SI-LABEL: @simple_read2st64_f64_over_max_offset
198198 ; SI-NOT: ds_read2st64_b64
199 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
199 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
200200 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
201201 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
202202 ; SI: s_endpgm
424424 ; SI: buffer_load_dword [[LOAD:v[0-9]+]]
425425 ; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
426426 ; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
427 ; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
427 ; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]]
428428 ; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
429429 ; SI: buffer_store_dword [[TMP2]]
430430 define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
2929 ; constant offsets.
3030 ; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
3131 ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
32 ; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
32 ; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], vcc, 16, v{{[0-9]+}}
3333 ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
3434 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
3535 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
0 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
11
22 ; CHECK-LABEL: {{^}}fold_sgpr:
3 ; CHECK: v_add_i32_e32 v{{[0-9]+}}, s
3 ; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
44 define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
55 entry:
66 %tmp0 = icmp ne i32 %fold, 0
5050
5151 ; GCN-LABEL: {{^}}legal_offset_fi_offset
5252 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
53 ; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000
53 ; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, 0x8000
5454 ; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
5555
5656 define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
55
66 ; FUNC-LABEL: {{^}}shl_2_add_9_i32:
77 ; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
8 ; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]]
8 ; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]]
99 ; SI: buffer_store_dword [[RESULT]]
1010 ; SI: s_endpgm
1111 define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
1919 }
2020
2121 ; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
22 ; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}}
22 ; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], vcc, 9, {{v[0-9]+}}
2323 ; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
2424 ; SI-DAG: buffer_store_dword [[ADDREG]]
2525 ; SI-DAG: buffer_store_dword [[SHLREG]]
3939
4040 ; FUNC-LABEL: {{^}}shl_2_add_999_i32:
4141 ; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
42 ; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]]
42 ; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]]
4343 ; SI: buffer_store_dword [[RESULT]]
4444 ; SI: s_endpgm
4545 define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
3434 ; SI-LABEL: {{^}}load_shl_base_lds_1:
3535 ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
3636 ; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
37 ; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
37 ; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
3838 ; SI-DAG: buffer_store_dword [[RESULT]]
3939 ; SI-DAG: buffer_store_dword [[ADDUSE]]
4040 ; SI: s_endpgm
66 ; FUNC-LABEL: {{^}}test_sub_i32:
77 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
88
9 ; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
9 ; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
1010 define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
1111 %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
1212 %a = load i32, i32 addrspace(1)* %in
2121 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2222 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2323
24 ; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
25 ; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
24 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
25 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
2626
2727 define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
2828 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
3939 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
4040 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
4141
42 ; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
43 ; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
44 ; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
45 ; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
42 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
43 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
44 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
45 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
4646
4747 define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
4848 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
2929 ; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]]
3030 ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
3131 ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
32 ; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
32 ; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
3333 ; SI: v_cndmask_b32_e64
3434 ; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
35 ; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
36 ; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
35 ; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
36 ; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
3737 ; SI: v_cndmask_b32_e64
3838 ; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
3939 ; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
40 ; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
40 ; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
4141 ; SI-DAG: v_cndmask_b32_e64
4242 ; SI-DAG: v_cndmask_b32_e64
4343 ; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
44 ; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
44 ; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
4545 ; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]],
4646 ; SI-DAG: v_cndmask_b32_e64
4747 ; SI-DAG: v_cndmask_b32_e64
109109 ; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
110110 ; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
111111 ; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
112 ; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
112 ; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]]
113113 ; SI-DAG: v_cndmask_b32_e64
114114 ; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
115 ; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
116 ; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
115 ; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
116 ; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
117117 ; SI-DAG: v_cndmask_b32_e64
118118 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
119119 ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
120 ; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
120 ; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
121121 ; SI-DAG: v_cndmask_b32_e64
122122 ; SI-DAG: v_cndmask_b32_e64
123123 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
132132 ; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
133133 ; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
134134 ; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
135 ; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
135 ; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]]
136136 ; SI-DAG: v_cndmask_b32_e64
137137 ; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
138 ; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
139 ; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
138 ; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
139 ; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
140140 ; SI-DAG: v_cndmask_b32_e64
141141 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
142142 ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
143 ; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
143 ; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
144144 ; SI-DAG: v_cndmask_b32_e64
145145 ; SI-DAG: v_cndmask_b32_e64
146146 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
259259 ; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
260260 ; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
261261 ; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
262 ; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
262 ; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]]
263263 ; SI-DAG: v_cndmask_b32_e64
264264 ; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
265 ; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
266 ; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
265 ; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
266 ; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
267267 ; SI-DAG: v_cndmask_b32_e64
268268 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
269269 ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
270 ; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
270 ; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
271271 ; SI-DAG: v_cndmask_b32_e64
272272 ; SI-DAG: v_cndmask_b32_e64
273273 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
282282 ; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
283283 ; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
284284 ; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
285 ; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
285 ; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]]
286286 ; SI-DAG: v_cndmask_b32_e64
287287 ; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
288 ; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
289 ; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
288 ; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
289 ; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
290290 ; SI-DAG: v_cndmask_b32_e64
291291 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
292292 ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
293 ; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
293 ; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
294294 ; SI-DAG: v_cndmask_b32_e64
295295 ; SI-DAG: v_cndmask_b32_e64
296296 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
305305 ; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]]
306306 ; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
307307 ; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
308 ; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
308 ; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[THIRD_RCP_LO]]
309309 ; SI-DAG: v_cndmask_b32_e64
310310 ; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
311 ; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
312 ; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
311 ; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], vcc, [[THIRD_E]], [[THIRD_RCP]]
312 ; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], vcc, [[THIRD_E]], [[THIRD_RCP]]
313313 ; SI-DAG: v_cndmask_b32_e64
314314 ; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]]
315315 ; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]]
316 ; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}}
316 ; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], vcc, [[THIRD_Num_S_Remainder]], {{v[0-9]+}}
317317 ; SI-DAG: v_cndmask_b32_e64
318318 ; SI-DAG: v_cndmask_b32_e64
319319 ; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]]
328328 ; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]]
329329 ; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
330330 ; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
331 ; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
331 ; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FOURTH_RCP_LO]]
332332 ; SI-DAG: v_cndmask_b32_e64
333333 ; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
334 ; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
335 ; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
334 ; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], vcc, [[FOURTH_E]], [[FOURTH_RCP]]
335 ; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], vcc, [[FOURTH_E]], [[FOURTH_RCP]]
336336 ; SI-DAG: v_cndmask_b32_e64
337337 ; SI: s_endpgm
338338 define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
22
33 ; Test that we correctly commute a sub instruction
44 ; FUNC-LABEL: {{^}}sub_rev:
5 ; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, s
6 ; SI: v_subrev_i32_e32 v{{[0-9]+}}, s
5 ; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s
6 ; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s
77
88 ; ModuleID = 'vop-shrink.ll'
99
3131 v_mul_i32_i24_e64 v1, v2, 100
3232 // CHECK: error: invalid operand for instruction
3333
34 v_add_i32_e32 v1, s[0:1], v2, v3
35 // CHECK: error: invalid operand for instruction
36
3437 // TODO: Constant bus restrictions
250250 // VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
251251 v_mbcnt_hi_u32_b32 v1, v2, v3
252252
253 // SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
254 // VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
255 v_add_i32 v1, v2, v3
256
257 // SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
258 // VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
259 v_add_u32 v1, v2, v3
260
261 // SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
262 // VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
263 v_sub_i32 v1, v2, v3
264
265 // SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
266 // VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
267 v_sub_u32 v1, v2, v3
268
269 // SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
270 // VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
271 v_subrev_i32 v1, v2, v3
272
273 // SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
274 // VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
275 v_subrev_u32 v1, v2, v3
276
277 // SICI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
278 // VI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
279 v_addc_u32 v1, v2, v3
280
281 // SICI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
282 // VI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
283 v_subb_u32 v1, v2, v3
284
285 // SICI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
286 // VI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
287 v_subbrev_u32 v1, v2, v3
253 // SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
254 // VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
255 v_add_i32 v1, vcc, v2, v3
256
257 // SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
258 // VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
259 v_add_i32 v1, s[0:1], v2, v3
260
261 // SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
262 // VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
263 v_add_i32_e64 v1, s[0:1], v2, v3
264
265 // SICI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x4a,0xd2,0x02,0x07,0x02,0x00]
266 // VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00]
267 v_add_i32_e64 v1, vcc, v2, v3
268
269 // SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
270 // VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
271 v_add_u32 v1, vcc, v2, v3
272
273 // SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
274 // VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
275 v_add_u32 v1, s[0:1], v2, v3
276
277 // SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
278 // VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
279 v_sub_i32 v1, vcc, v2, v3
280
281 // SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
282 // VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
283 v_sub_i32 v1, s[0:1], v2, v3
284
285 // SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
286 // VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
287 v_sub_u32 v1, vcc, v2, v3
288
289 // SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
290 // VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
291 v_sub_u32 v1, s[0:1], v2, v3
292
293 // SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
294 // VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
295 v_subrev_i32 v1, vcc, v2, v3
296
297 // SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
298 // VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
299 v_subrev_i32 v1, s[0:1], v2, v3
300
301 // SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
302 // VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
303 v_subrev_u32 v1, vcc, v2, v3
304
305 // SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
306 // VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
307 v_subrev_u32 v1, s[0:1], v2, v3
308
309 // SICI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
310 // VI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
311 v_addc_u32 v1, vcc, v2, v3
312
313 // SICI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00]
314 // VI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x02,0x00]
315 v_addc_u32 v1, s[0:1], v2, v3
316
317 // SICI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
318 // VI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
319 v_subb_u32 v1, vcc, v2, v3
320
321 // SICI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0x02,0x00]
322 // VI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0x02,0x00]
323 v_subb_u32 v1, s[0:1], v2, v3
324
325 // SICI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
326 // VI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
327 v_subbrev_u32 v1, vcc, v2, v3
328
329 // SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0x02,0x00]
330 // VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0x02,0x00]
331 v_subbrev_u32 v1, s[0:1], v2, v3
288332
289333 // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
290334 // VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]