llvm.org GIT mirror llvm / 6bc93b9
[AMDGPU][MC][GFX8][GFX9] Corrected names of integer v_{add/addc/sub/subrev/subb/subbrev} See bug 34765: https://bugs.llvm.org//show_bug.cgi?id=34765 Reviewers: tamazov, SamWot, arsenm, vpykhtin Differential Revision: https://reviews.llvm.org/D40088 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318675 91177308-0d34-0410-b5e6-96231b3b80d8 Dmitry Preobrazhensky 2 years ago
63 changed file(s) with 702 addition(s) and 597 deletion(s). Raw diff Collapse all Expand all
106106
107107 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
108108 SIEncodingFamily Gen = subtargetEncodingFamily(ST);
109
110 if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
111 ST.getGeneration() >= AMDGPUSubtarget::GFX9)
112 Gen = SIEncodingFamily::GFX9;
113
109114 if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
110115 Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
111116 : SIEncodingFamily::SDWA;
112
113 if ((get(Opcode).TSFlags & SIInstrFlags::F16_ZFILL) != 0 &&
114 ST.getGeneration() >= AMDGPUSubtarget::GFX9)
115 Gen = SIEncodingFamily::GFX9;
116117
117118 int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
118119
211211 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
212212 if (Res) break;
213213
214 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
215 if (Res) break;
216
214217 if (Bytes.size() < 4) break;
215218 const uint64_t QW = ((uint64_t)eatBytes(Bytes) << 32) | DW;
216219 Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address);
6868 VOPAsmPrefer32Bit = UINT64_C(1) << 41,
6969 VOP3_OPSEL = UINT64_C(1) << 42,
7070 maybeAtomic = UINT64_C(1) << 43,
71 F16_ZFILL = UINT64_C(1) << 44,
71 renamedInGFX9 = UINT64_C(1) << 44,
7272
7373 // Is a clamp on FP type.
7474 FPClamp = UINT64_C(1) << 45,
9494 // Is it possible for this instruction to be atomic?
9595 field bit maybeAtomic = 0;
9696
97 // This bit indicates that this is a 16-bit instruction which zero-fills
98 // unused bits in dst. Note that new GFX9 opcodes preserve unused bits.
99 field bit F16_ZFILL = 0;
97 // This bit indicates that this is a VI instruction which is renamed
98 // in GFX9. Required for correct mapping from pseudo to MC.
99 field bit renamedInGFX9 = 0;
100100
101101 // This bit indicates that this has a floating point result type, so
102102 // the clamp modifier has floating point semantics.
163163 let TSFlags{42} = VOP3_OPSEL;
164164
165165 let TSFlags{43} = maybeAtomic;
166 let TSFlags{44} = F16_ZFILL;
166 let TSFlags{44} = renamedInGFX9;
167167
168168 let TSFlags{45} = FPClamp;
169169 let TSFlags{46} = IntClamp;
15271527 def : Int16Med3Pat;
15281528 def : Int16Med3Pat;
15291529 } // End Predicates = [isGFX9]
1530
1531 //============================================================================//
1532 // Assembler aliases
1533 //============================================================================//
1534
1535 multiclass NoCarryAlias
1536 Instruction Inst32NC, Instruction Inst64NC,
1537 Instruction Inst32CO, Instruction Inst64CO> {
1538 def : InstAlias
1539 (Inst32NC VGPR_32:$vdst, VSrc_b32:$src0, VGPR_32:$src1), 1000>,
1540 Requires<[HasAddNoCarryInsts]>;
1541
1542 def : InstAlias
1543 (Inst64NC VGPR_32:$vdst, VCSrc_b32:$src0, VCSrc_b32:$src1), -10>,
1544 Requires<[HasAddNoCarryInsts]>;
1545
1546 def : InstAlias
1547 (Inst32CO VGPR_32:$vdst, VSrc_b32:$src0, VGPR_32:$src1), 1000>,
1548 Requires<[HasAddNoCarryInsts]>;
1549
1550 def : InstAlias
1551 (Inst64CO VGPR_32:$vdst, SReg_64:$sdst, VSrc_b32:$src0, VGPR_32:$src1), -10>,
1552 Requires<[HasAddNoCarryInsts]>;
1553 }
1554
1555 // gfx9 made a mess of add instruction names. The existing add
1556 // instructions add _co added to the names, and their old names were
1557 // repurposed to a version without carry out.
1558 // TODO: Do we need SubtargetPredicates for MnemonicAliases?
1559 let Predicates = [HasAddNoCarryInsts] in {
1560 defm : NoCarryAlias<"v_add_u32", V_ADD_U32_e32_vi, V_ADD_U32_e64_vi,
1561 V_ADD_I32_e32_vi, V_ADD_I32_e64_vi>;
1562 defm : NoCarryAlias<"v_sub_u32", V_SUB_U32_e32_vi, V_SUB_U32_e64_vi,
1563 V_SUB_I32_e32_vi, V_SUB_I32_e64_vi>;
1564 defm : NoCarryAlias<"v_subrev_u32",
1565 V_SUBREV_U32_e32_vi, V_SUBREV_U32_e64_vi,
1566 V_SUBREV_I32_e32_vi, V_SUBREV_I32_e64_vi>;
1567 }
1568
1569 let Predicates = [NotHasAddNoCarryInsts] in {
1570 def : MnemonicAlias<"v_add_u32", "v_add_i32">;
1571 def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
1572 def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
1573 }
142142 VOPProfile P,
143143 SDPatternOperator node = null_frag,
144144 string revOp = opName,
145 bit GFX9Renamed = 0,
145146 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
146
147 let SchedRW = [Write32Bit, WriteSALU] in {
148 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
149 def _e32 : VOP2_Pseudo ,
150 Commutable_REV;
151
152 def _sdwa : VOP2_SDWA_Pseudo {
153 let AsmMatchConverter = "cvtSdwaVOP2b";
147 let renamedInGFX9 = GFX9Renamed in {
148 let SchedRW = [Write32Bit, WriteSALU] in {
149 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
150 def _e32 : VOP2_Pseudo ,
151 Commutable_REV;
152
153 def _sdwa : VOP2_SDWA_Pseudo {
154 let AsmMatchConverter = "cvtSdwaVOP2b";
155 }
154156 }
155 }
156
157 def _e64 : VOP3_Pseudo .ret>,
158 Commutable_REV;
157
158 def _e64 : VOP3_Pseudo .ret>,
159 Commutable_REV;
160 }
159161 }
160162 }
161163
277279
278280 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
279281 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
280 clampmod:$clamp, omod:$omod,
282 clampmod:$clamp,
281283 dst_sel:$dst_sel, dst_unused:$dst_unused,
282284 src0_sel:$src0_sel, src1_sel:$src1_sel);
283285
284286 let InsDPP = (ins DstRCDPP:$old,
285 Src0Mod:$src0_modifiers, Src0DPP:$src0,
286 Src1Mod:$src1_modifiers, Src1DPP:$src1,
287 Src0DPP:$src0,
288 Src1DPP:$src1,
287289 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
288290 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
289291 let HasExt = 1;
369371
370372 // V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
371373 // but the VI instructions behave the same as the SI versions.
372 defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32>;
373 defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32>;
374 defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32">;
375 defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>;
376 defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>;
377 defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
374 defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_i32", 1>;
375 defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>;
376 defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>;
377 defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>;
378 defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
379 defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
378380
379381
380382 let SubtargetPredicate = HasAddNoCarryInsts in {
659661 // VI
660662 //===----------------------------------------------------------------------===//
661663
662 class VOP2_DPP op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> :
663 VOP_DPP {
664 class VOP2_DPP op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfile P = ps.Pfl> :
665 VOP_DPP {
664666 let Defs = ps.Defs;
665667 let Uses = ps.Uses;
666668 let SchedRW = ps.SchedRW;
711713 }
712714 }
713715
714 multiclass Base_VOP2be_Real_e32e64_vi op> : VOP2_Real_e32_vi {
715 def _e64_vi :
716 VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>,
717 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>;
718 }
719
720716 multiclass Base_VOP2_Real_e32e64_vi op> :
721717 VOP2_Real_e32_vi,
722718 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
735731 VOP2_SDWA9Ae (NAME#"_sdwa").Pfl>;
736732 }
737733
738 multiclass VOP2be_Real_e32e64_vi op> :
739 Base_VOP2be_Real_e32e64_vi, VOP2_SDWA_Real, VOP2_SDWA9_Real {
740 // For now left dpp only for asm/dasm
741 // TODO: add corresponding pseudo
742 def _dpp : VOP2_DPP(NAME#"_e32")>;
743 }
734 let AssemblerPredicates = [isVIOnly] in {
735
736 multiclass VOP2be_Real_e32e64_vi_only op, string OpName, string AsmName> {
737 def _e32_vi :
738 VOP2_Real(OpName#"_e32"), SIEncodingFamily.VI>,
739 VOP2e(OpName#"_e32").Pfl> {
740 VOP2_Pseudo ps = !cast(OpName#"_e32");
741 let AsmString = AsmName # ps.AsmOperands;
742 let DecoderNamespace = "VI";
743 }
744 def _e64_vi :
745 VOP3_Real(OpName#"_e64"), SIEncodingFamily.VI>,
746 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast(OpName#"_e64").Pfl> {
747 VOP3_Pseudo ps = !cast(OpName#"_e64");
748 let AsmString = AsmName # ps.AsmOperands;
749 let DecoderNamespace = "VI";
750 }
751 def _sdwa_vi :
752 VOP_SDWA_Real (OpName#"_sdwa")>,
753 VOP2_SDWAe (OpName#"_sdwa").Pfl> {
754 VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa");
755 let AsmString = AsmName # ps.AsmOperands;
756 }
757 def _dpp :
758 VOP2_DPP(OpName#"_e32"), AsmName>;
759 }
760 }
761
762 let AssemblerPredicates = [isGFX9] in {
763
764 multiclass VOP2be_Real_e32e64_gfx9 op, string OpName, string AsmName> {
765 def _e32_gfx9 :
766 VOP2_Real(OpName#"_e32"), SIEncodingFamily.GFX9>,
767 VOP2e(OpName#"_e32").Pfl> {
768 VOP2_Pseudo ps = !cast(OpName#"_e32");
769 let AsmString = AsmName # ps.AsmOperands;
770 let DecoderNamespace = "GFX9";
771 }
772 def _e64_gfx9 :
773 VOP3_Real(OpName#"_e64"), SIEncodingFamily.GFX9>,
774 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast(OpName#"_e64").Pfl> {
775 VOP3_Pseudo ps = !cast(OpName#"_e64");
776 let AsmString = AsmName # ps.AsmOperands;
777 let DecoderNamespace = "GFX9";
778 }
779 def _sdwa_gfx9 :
780 VOP_SDWA9_Real (OpName#"_sdwa")>,
781 VOP2_SDWA9Ae (OpName#"_sdwa").Pfl> {
782 VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa");
783 let AsmString = AsmName # ps.AsmOperands;
784 }
785 def _dpp_gfx9 :
786 VOP2_DPP(OpName#"_e32"), AsmName> {
787 let DecoderNamespace = "SDWA9";
788 }
789 }
790
791 multiclass VOP2_Real_e32e64_gfx9 op> {
792 def _e32_gfx9 :
793 VOP2_Real(NAME#"_e32"), SIEncodingFamily.GFX9>,
794 VOP2e(NAME#"_e32").Pfl>{
795 let DecoderNamespace = "GFX9";
796 }
797 def _e64_gfx9 :
798 VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX9>,
799 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl> {
800 let DecoderNamespace = "GFX9";
801 }
802 def _sdwa_gfx9 :
803 VOP_SDWA9_Real (NAME#"_sdwa")>,
804 VOP2_SDWA9Ae (NAME#"_sdwa").Pfl> {
805 }
806 def _dpp_gfx9 :
807 VOP2_DPP(NAME#"_e32")> {
808 let DecoderNamespace = "SDWA9";
809 }
810 }
811
812 } // AssemblerPredicates = [isGFX9]
744813
745814 multiclass VOP2_Real_e32e64_vi op> :
746815 Base_VOP2_Real_e32e64_vi, VOP2_SDWA_Real, VOP2_SDWA9_Real {
774843 defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>;
775844 defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>;
776845 defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>;
777 defm V_ADD_I32 : VOP2be_Real_e32e64_vi <0x19>;
778 defm V_SUB_I32 : VOP2be_Real_e32e64_vi <0x1a>;
779 defm V_SUBREV_I32 : VOP2be_Real_e32e64_vi <0x1b>;
780 defm V_ADDC_U32 : VOP2be_Real_e32e64_vi <0x1c>;
781 defm V_SUBB_U32 : VOP2be_Real_e32e64_vi <0x1d>;
782 defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>;
846
847 defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_I32", "v_add_u32">;
848 defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_I32", "v_sub_u32">;
849 defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_I32", "v_subrev_u32">;
850 defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">;
851 defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">;
852 defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">;
853
854 defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_I32", "v_add_co_u32">;
855 defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_I32", "v_sub_co_u32">;
856 defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_I32", "v_subrev_co_u32">;
857 defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">;
858 defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">;
859 defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">;
860
861 defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>;
862 defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>;
863 defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>;
783864
784865 defm V_READLANE_B32 : VOP32_Real_vi <0x289>;
785866 defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>;
839920 def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
840921
841922 } // End SubtargetPredicate = isVI
842
843 let SubtargetPredicate = HasAddNoCarryInsts in {
844 defm V_ADD_U32 : VOP2_Real_e32e64_vi <0x34>;
845 defm V_SUB_U32 : VOP2_Real_e32e64_vi <0x35>;
846 defm V_SUBREV_U32 : VOP2_Real_e32e64_vi <0x36>;
847 }
409409
410410 let SubtargetPredicate = Has16BitInsts in {
411411
412 let F16_ZFILL = 1 in {
412 let renamedInGFX9 = 1 in {
413413 def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup>;
414414 }
415415 let SubtargetPredicate = isGFX9 in {
418418
419419 let isCommutable = 1 in {
420420
421 let F16_ZFILL = 1 in {
421 let renamedInGFX9 = 1 in {
422422 def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>;
423423 def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>;
424424 def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>;
505505
506506 def V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile>;
507507 def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile>;
508
509 def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile>;
510 def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile>;
508511 } // End SubtargetPredicate = isGFX9
509512
510513 //===----------------------------------------------------------------------===//
702705 }
703706 }
704707
708 multiclass VOP3_Real_gfx9 op, string AsmName> {
709 def _vi : VOP3_Real(NAME), SIEncodingFamily.GFX9>,
710 VOP3e_vi (NAME).Pfl> {
711 VOP3_Pseudo ps = !cast(NAME);
712 let AsmString = AsmName # ps.AsmOperands;
713 }
714 }
715
705716 } // End AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9"
706717
707718 defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
768779 defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">;
769780 defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">;
770781
782 defm V_ADD_I32_gfx9 : VOP3_Real_gfx9 <0x29c, "v_add_i32">;
783 defm V_SUB_I32_gfx9 : VOP3_Real_gfx9 <0x29d, "v_sub_i32">;
784
771785 defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>;
772786 defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>;
773787 defm V_INTERP_MOV_F32_e64 : VOP3Interp_Real_vi <0x272>;
5151 ; GCN-LABEL: {{^}}s_test_add_v2i16_kernarg:
5252 ; GFX9: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
5353
54 ; VI: v_add_i32
55 ; VI: v_add_i32_sdwa
54 ; VI: v_add_u32
55 ; VI: v_add_u32_sdwa
5656 define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 {
5757 %add = add <2 x i16> %a, %b
5858 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
1212
1313 ; GCN-LABEL: {{^}}work_item_info:
1414 ; GCN-NOT: v0
15 ; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, v0, v{{[0-9]+}}
15 ; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v0, v{{[0-9]+}}
1616 ; GCN: buffer_store_dword [[RESULT]]
1717 define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
1818 entry:
22
33
44 ; GCN-LABEL: {{^}}shader_cc:
5 ; GCN: v_add_i32_e32 v0, vcc, s8, v0
5 ; GCN: v_add_{{[iu]}}32_e32 v0, vcc, s8, v0
66 define amdgpu_cs float @shader_cc(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
77 %vi = bitcast float %v to i32
88 %x = add i32 %vi, %w
390390 ; FUNC-LABEL: ptrtoint:
391391 ; SI-NOT: ds_write
392392 ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
393 ; SI: v_add_i32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5,
393 ; SI: v_add_{{[iu]}}32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5,
394394 ; SI: buffer_load_dword v{{[0-9]+}}, [[ADD_OFFSET:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
395395 define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
396396 %alloca = alloca [16 x i32]
88 ; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
99 ; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 6, v{{[0-9]+}}
1010 ; CI: v_and_b32_e32 v[[ADDRLO:[0-9]+]], 0x3fc, v[[SHR]]
11 ; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
12 ; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
11 ; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
12 ; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
1313 ; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
1414 define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x) {
1515 %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
2828 ; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 15
2929 ; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE1:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3030 ; VI-SDWA: v_lshlrev_b64 v{{\[}}[[ADDRBASE:[0-9]+]]:{{[^\]+}}], 2, v{{\[}}[[ADDRBASE1]]:{{[^\]+}}]
31 ; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
31 ; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
3232 ; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 1, v{{[0-9]+}}
3333 ; CI: v_and_b32_e32 v[[AND:[0-9]+]], 0x7fff8000, v[[SHR]]
3434 ; CI: v_lshl_b64 v{{\[}}[[ADDRLO:[0-9]+]]:{{[^\]+}}], v{{\[}}[[AND]]:{{[^\]+}}], 2
35 ; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
35 ; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
3636 ; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
3737 define amdgpu_kernel void @bfe_combine16(i32 addrspace(1)* nocapture %arg, i32 %x) {
3838 %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
2121 ; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
2222 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
2323 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
24 ; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
24 ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
2525
2626 ; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
2727 ; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
9999 ; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32:
100100 ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
101101 ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
102 ; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
102 ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
103103
104104 ; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
105105 ; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
3737 ; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}}
3838
3939 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
40 ; GCN: v_add_i32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
40 ; GCN: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
4141 ; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
4242
4343 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
44 ; GCN: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
44 ; GCN: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
4545
4646 ; GCN: s_swappc_b64
4747
111111 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 32, [[FFBH]], vcc
112112
113113 ; SI: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, 24, [[SELECT]]
114 ; VI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, -16, [[SELECT]]
114 ; VI: v_add_u32_e32 [[RESULT:v[0-9]+]], vcc, -16, [[SELECT]]
115115 ; GCN: buffer_store_byte [[RESULT]],
116116 ; GCN: s_endpgm
117117 define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
150150 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
151151 ; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]]
152152 ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
153 ; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
153 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
154154 ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
155155 ; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], vcc
156156 ; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]]
125125 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
126126 ; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]]
127127 ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
128 ; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
128 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
129129 ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
130130 ; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]]
131131 ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}}
187187 ; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0
188188 ; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]]
189189
190 ; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]]
190 ; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]]
191191
192192 ; GCN: buffer_store_dword [[RESULT]],
193193 ; GCN: s_endpgm
168168
169169 ; GCN-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
170170 ; GCN: {{buffer|flat}}_load_dword [[LOADREG:v[0-9]+]],
171 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
171 ; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
172172 ; GCN-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
173173 ; GCN: buffer_store_dword [[CONV]],
174174 define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
33 ; GCN-LABEL: ds_read32_combine_stride_400:
44 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
55 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
6 ; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
7 ; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
8 ; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
9 ; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
10 ; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
11 ; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
6 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
7 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
8 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
9 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
10 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
11 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
1212 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
1313 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
1414 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
4545 ; GCN-LABEL: ds_read32_combine_stride_400_back:
4646 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
4747 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
48 ; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
49 ; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
50 ; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
51 ; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
52 ; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
53 ; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
48 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
49 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
50 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
51 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
52 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
53 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
5454 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
5555 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
5656 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
123123 ; GCN-LABEL: ds_read32_combine_stride_8192_shifted:
124124 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
125125 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
126 ; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
127 ; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
128 ; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
129 ; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
130 ; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
131 ; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
126 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
127 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
128 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
129 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
130 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
131 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
132132 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32
133133 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32
134134 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32
159159 ; GCN-LABEL: ds_read64_combine_stride_400:
160160 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
161161 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
162 ; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
163 ; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
162 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
163 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
164164 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50
165165 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150
166166 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250
197197 ; GCN-LABEL: ds_read64_combine_stride_8192_shifted:
198198 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
199199 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
200 ; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
201 ; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
202 ; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
203 ; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
204 ; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
205 ; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
200 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
201 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
202 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
203 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
204 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
205 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
206206 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16
207207 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16
208208 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16
233233 ; GCN-LABEL: ds_write32_combine_stride_400:
234234 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
235235 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
236 ; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
237 ; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
238 ; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
239 ; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
240 ; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
241 ; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
236 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
237 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
238 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
239 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
240 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
241 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
242242 ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
243243 ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
244244 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
266266 ; GCN-LABEL: ds_write32_combine_stride_400_back:
267267 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
268268 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
269 ; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
270 ; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
271 ; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
272 ; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
273 ; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
274 ; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
269 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
270 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
271 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
272 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
273 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
274 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
275275 ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
276276 ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
277277 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
326326 ; GCN-LABEL: ds_write32_combine_stride_8192_shifted:
327327 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
328328 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
329 ; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
330 ; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
331 ; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
332 ; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
333 ; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
334 ; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
329 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
330 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
331 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
332 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
333 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
334 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
335335 ; GCN-DAG: ds_write2st64_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
336336 ; GCN-DAG: ds_write2st64_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
337337 ; GCN-DAG: ds_write2st64_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
355355 ; GCN-LABEL: ds_write64_combine_stride_400:
356356 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
357357 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
358 ; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
359 ; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
358 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
359 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
360360 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
361361 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150
362362 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250
384384 ; GCN-LABEL: ds_write64_combine_stride_8192_shifted:
385385 ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
386386 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
387 ; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
388 ; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
389 ; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
390 ; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
391 ; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
392 ; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
387 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
388 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
389 ; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
390 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
391 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
392 ; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
393393 ; GCN-DAG: ds_write2st64_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
394394 ; GCN-DAG: ds_write2st64_b64 [[B2]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
395395 ; GCN-DAG: ds_write2st64_b64 [[B3]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
5252 }
5353
5454 ; GCN-LABEL: {{^}}test_global
55 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}}
55 ; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}}
5656 ; GCN: flat_store_dword
5757 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
5858 ; GCN-NEXT: s_barrier
2323
2424 ; GCN-LABEL: {{^}}void_func_i1_signext:
2525 ; GCN: s_waitcnt
26 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 12, v0
26 ; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0
2727 ; GCN-NOT: v0
2828 ; GCN: buffer_store_dword v0, off
2929 define void @void_func_i1_signext(i1 signext %arg0) #0 {
5959
6060 ; GCN-LABEL: {{^}}void_func_i8_zeroext:
6161 ; GCN-NOT: and_b32
62 ; GCN: v_add_i32_e32 v0, vcc, 12, v0
62 ; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0
6363 define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
6464 %ext = zext i8 %arg0 to i32
6565 %add = add i32 %ext, 12
6969
7070 ; GCN-LABEL: {{^}}void_func_i8_signext:
7171 ; GCN-NOT: v_bfe_i32
72 ; GCN: v_add_i32_e32 v0, vcc, 12, v0
72 ; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0
7373 define void @void_func_i8_signext(i8 signext %arg0) #0 {
7474 %ext = sext i8 %arg0 to i32
7575 %add = add i32 %ext, 12
8686
8787 ; GCN-LABEL: {{^}}void_func_i16_zeroext:
8888 ; GCN-NOT: v0
89 ; GCN: v_add_i32_e32 v0, vcc, 12, v0
89 ; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0
9090 define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
9191 %ext = zext i16 %arg0 to i32
9292 %add = add i32 %ext, 12
9696
9797 ; GCN-LABEL: {{^}}void_func_i16_signext:
9898 ; GCN-NOT: v0
99 ; GCN: v_add_i32_e32 v0, vcc, 12, v0
99 ; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0
100100 define void @void_func_i16_signext(i16 signext %arg0) #0 {
101101 %ext = sext i16 %arg0 to i32
102102 %add = add i32 %ext, 12
395395
396396 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
397397
398 ; GCN-DAG: v_add_i32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0
398 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0
399399 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_4]], s[0:3], s4 offen{{$}}
400400
401 ; GCN-DAG: v_add_i32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0
401 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0
402402 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_8]], s[0:3], s4 offen{{$}}
403403
404 ; GCN-DAG: v_add_i32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0
404 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0
405405 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_12]], s[0:3], s4 offen{{$}}
406406
407 ; GCN-DAG: v_add_i32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0
407 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0
408408 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_16]], s[0:3], s4 offen{{$}}
409409
410 ; GCN-DAG: v_add_i32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0
410 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0
411411 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_20]], s[0:3], s4 offen{{$}}
412412
413 ; GCN-DAG: v_add_i32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0
413 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0
414414 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_24]], s[0:3], s4 offen{{$}}
415415
416 ; GCN-DAG: v_add_i32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0
416 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0
417417 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_28]], s[0:3], s4 offen{{$}}
418418
419 ; GCN-DAG: v_add_i32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0
419 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0
420420 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_32]], s[0:3], s4 offen{{$}}
421421
422 ; GCN-DAG: v_add_i32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0
422 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0
423423 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_36]], s[0:3], s4 offen{{$}}
424424
425 ; GCN-DAG: v_add_i32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0
425 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0
426426 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_40]], s[0:3], s4 offen{{$}}
427427
428 ; GCN-DAG: v_add_i32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0
428 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0
429429 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_44]], s[0:3], s4 offen{{$}}
430430
431 ; GCN-DAG: v_add_i32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0
431 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0
432432 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_48]], s[0:3], s4 offen{{$}}
433433
434 ; GCN-DAG: v_add_i32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0
434 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0
435435 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_52]], s[0:3], s4 offen{{$}}
436436
437 ; GCN-DAG: v_add_i32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0
437 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0
438438 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_56]], s[0:3], s4 offen{{$}}
439439
440 ; GCN-DAG: v_add_i32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0
440 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0
441441 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_60]], s[0:3], s4 offen{{$}}
442442
443 ; GCN-DAG: v_add_i32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0
443 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0
444444 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_64]], s[0:3], s4 offen{{$}}
445445
446 ; GCN-DAG: v_add_i32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0
446 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0
447447 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_68]], s[0:3], s4 offen{{$}}
448448
449 ; GCN-DAG: v_add_i32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0
449 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0
450450 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_72]], s[0:3], s4 offen{{$}}
451451
452 ; GCN-DAG: v_add_i32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0
452 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0
453453 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_76]], s[0:3], s4 offen{{$}}
454454
455 ; GCN-DAG: v_add_i32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0
455 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0
456456 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_80]], s[0:3], s4 offen{{$}}
457457
458 ; GCN-DAG: v_add_i32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0
458 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0
459459 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_84]], s[0:3], s4 offen{{$}}
460460
461 ; GCN-DAG: v_add_i32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0
461 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0
462462 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_88]], s[0:3], s4 offen{{$}}
463463
464 ; GCN-DAG: v_add_i32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0
464 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0
465465 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_92]], s[0:3], s4 offen{{$}}
466466
467 ; GCN-DAG: v_add_i32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0
467 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0
468468 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_96]], s[0:3], s4 offen{{$}}
469469
470 ; GCN-DAG: v_add_i32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0
470 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0
471471 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_100]], s[0:3], s4 offen{{$}}
472472
473 ; GCN-DAG: v_add_i32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0
473 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0
474474 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_104]], s[0:3], s4 offen{{$}}
475475
476 ; GCN-DAG: v_add_i32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0
476 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0
477477 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_108]], s[0:3], s4 offen{{$}}
478478
479 ; GCN-DAG: v_add_i32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0
479 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0
480480 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_112]], s[0:3], s4 offen{{$}}
481481
482 ; GCN-DAG: v_add_i32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0
482 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0
483483 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_116]], s[0:3], s4 offen{{$}}
484484
485 ; GCN-DAG: v_add_i32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0
485 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0
486486 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_120]], s[0:3], s4 offen{{$}}
487487
488 ; GCN-DAG: v_add_i32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0
488 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0
489489 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_124]], s[0:3], s4 offen{{$}}
490490
491 ; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
491 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
492492 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}}
493493
494494 ; GCN: buffer_load_dword v34
509509
510510 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
511511
512 ; GCN-DAG: v_add_i32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0
512 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0
513513 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_4]], s[0:3], s4 offen{{$}}
514514
515 ; GCN-DAG: v_add_i32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0
515 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0
516516 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_8]], s[0:3], s4 offen{{$}}
517517
518 ; GCN-DAG: v_add_i32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0
518 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0
519519 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_12]], s[0:3], s4 offen{{$}}
520520
521 ; GCN-DAG: v_add_i32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0
521 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0
522522 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_16]], s[0:3], s4 offen{{$}}
523523
524 ; GCN-DAG: v_add_i32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0
524 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0
525525 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_20]], s[0:3], s4 offen{{$}}
526526
527 ; GCN-DAG: v_add_i32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0
527 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0
528528 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_24]], s[0:3], s4 offen{{$}}
529529
530 ; GCN-DAG: v_add_i32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0
530 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0
531531 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_28]], s[0:3], s4 offen{{$}}
532532
533 ; GCN-DAG: v_add_i32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0
533 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0
534534 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_32]], s[0:3], s4 offen{{$}}
535535
536 ; GCN-DAG: v_add_i32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0
536 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0
537537 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_36]], s[0:3], s4 offen{{$}}
538538
539 ; GCN-DAG: v_add_i32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0
539 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0
540540 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_40]], s[0:3], s4 offen{{$}}
541541
542 ; GCN-DAG: v_add_i32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0
542 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0
543543 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_44]], s[0:3], s4 offen{{$}}
544544
545 ; GCN-DAG: v_add_i32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0
545 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0
546546 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_48]], s[0:3], s4 offen{{$}}
547547
548 ; GCN-DAG: v_add_i32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0
548 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0
549549 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_52]], s[0:3], s4 offen{{$}}
550550
551 ; GCN-DAG: v_add_i32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0
551 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0
552552 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_56]], s[0:3], s4 offen{{$}}
553553
554 ; GCN-DAG: v_add_i32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0
554 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0
555555 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_60]], s[0:3], s4 offen{{$}}
556556
557 ; GCN-DAG: v_add_i32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0
557 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0
558558 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_64]], s[0:3], s4 offen{{$}}
559559
560 ; GCN-DAG: v_add_i32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0
560 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0
561561 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_68]], s[0:3], s4 offen{{$}}
562562
563 ; GCN-DAG: v_add_i32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0
563 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0
564564 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_72]], s[0:3], s4 offen{{$}}
565565
566 ; GCN-DAG: v_add_i32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0
566 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0
567567 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_76]], s[0:3], s4 offen{{$}}
568568
569 ; GCN-DAG: v_add_i32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0
569 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0
570570 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_80]], s[0:3], s4 offen{{$}}
571571
572 ; GCN-DAG: v_add_i32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0
572 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0
573573 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_84]], s[0:3], s4 offen{{$}}
574574
575 ; GCN-DAG: v_add_i32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0
575 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0
576576 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_88]], s[0:3], s4 offen{{$}}
577577
578 ; GCN-DAG: v_add_i32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0
578 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0
579579 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_92]], s[0:3], s4 offen{{$}}
580580
581 ; GCN-DAG: v_add_i32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0
581 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0
582582 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_96]], s[0:3], s4 offen{{$}}
583583
584 ; GCN-DAG: v_add_i32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0
584 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0
585585 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_100]], s[0:3], s4 offen{{$}}
586586
587 ; GCN-DAG: v_add_i32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0
587 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0
588588 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_104]], s[0:3], s4 offen{{$}}
589589
590 ; GCN-DAG: v_add_i32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0
590 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0
591591 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_108]], s[0:3], s4 offen{{$}}
592592
593 ; GCN-DAG: v_add_i32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0
593 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0
594594 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_112]], s[0:3], s4 offen{{$}}
595595
596 ; GCN-DAG: v_add_i32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0
596 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0
597597 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_116]], s[0:3], s4 offen{{$}}
598598
599 ; GCN-DAG: v_add_i32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0
599 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0
600600 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_120]], s[0:3], s4 offen{{$}}
601601
602 ; GCN-DAG: v_add_i32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0
602 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0
603603 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_124]], s[0:3], s4 offen{{$}}
604604
605 ; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
605 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
606606 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}}
607607
608608 ; GCN: buffer_load_dword v34
622622
623623 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
624624
625 ; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
625 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
626626 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}}
627627
628628
629 ; GCN-DAG: v_add_i32_e32 [[ADD_256:v[0-9]+]], vcc, 0xfc, v0
629 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_256:v[0-9]+]], vcc, 0xfc, v0
630630 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_256]], s[0:3], s4 offen{{$}}
631631
632632 ; GCN: buffer_load_dword v33
102102 }
103103
104104 ;CHECK-LABEL: {{^}}buffer_load_negative_offset:
105 ;CHECK: v_add_i32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0
105 ;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0
106106 ;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen
107107 define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) {
108108 main_body:
5050 ; GCN: s_bfm_b64 exec, s1, 0
5151 ; GCN: s_cmp_eq_u32 s1, 64
5252 ; GCN: s_cmov_b64 exec, -1
53 ; GCN: v_add_i32_e32 v0, vcc, s0, v0
53 ; GCN: v_add_co_u32_e32 v0, vcc, s0, v0
5454 define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) {
5555 main_body:
5656 call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
6464 ; GCN: s_bfm_b64 exec, s1, 0
6565 ; GCN: s_cmp_eq_u32 s1, 64
6666 ; GCN: s_cmov_b64 exec, -1
67 ; GCN: v_add_i32_e32 v0, vcc, s0, v0
67 ; GCN: v_add_co_u32_e32 v0, vcc, s0, v0
6868 define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) {
6969 main_body:
7070 %s = add i32 %a, %count
395395 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
396396 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
397397 ; GCN: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
398 ; GCN: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]]
398 ; GCN: v_add_{{[iu]}}32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]]
399399 ; GCN: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
400400 ; GCN: buffer_store_dword [[TMP2]]
401401 define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
6464
6565 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
6666 ; GCN: buffer_load_dword
67 ; GCN: v_add_i32
67 ; GCN: v_add_{{[iu]}}32
6868 ; GCN-NEXT: v_and_b32_e32
6969 ; FIXME: Should be using s_add_i32
7070 ; GCN-NOT: {{[^@]}}bfe
8080
8181 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
8282 ; GCN: buffer_load_dword
83 ; GCN: v_add_i32
83 ; GCN: v_add_{{[iu]}}32
8484 ; GCN-NEXT: v_and_b32_e32
8585 ; GCN-NOT: {{[^@]}}bfe
8686 ; GCN: s_endpgm
9595
9696 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
9797 ; GCN: buffer_load_dword
98 ; GCN: v_add_i32
98 ; GCN: v_add_{{[iu]}}32
9999 ; GCN: bfe
100100 ; GCN: s_endpgm
101101 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
109109
110110 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
111111 ; GCN: buffer_load_dword
112 ; GCN: v_add_i32
112 ; GCN: v_add_{{[iu]}}32
113113 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
114114 ; GCN-NEXT: bfe
115115 ; GCN: s_endpgm
124124
125125 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
126126 ; GCN: buffer_load_dword
127 ; GCN: v_add_i32
127 ; GCN: v_add_{{[iu]}}32
128128 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
129129 ; GCN-NEXT: bfe
130130 ; GCN: s_endpgm
139139
140140 ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
141141 ; GCN: buffer_load_dword
142 ; GCN: v_add_i32
142 ; GCN: v_add_{{[iu]}}32
143143 ; GCN-NEXT: bfe
144144 ; GCN: s_endpgm
145145 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
8686 ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]]
8787 ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]]
8888
89 ; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
89 ; GFX9: v_add_{{[_coiu]*}}32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
9090 define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
9191 %tid = call i32 @llvm.amdgcn.workitem.id.x()
9292 %tid.ext = sext i32 %tid to i64
8080 ; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]]
8181 ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]]
8282
83 ; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
83 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
8484 define amdgpu_kernel void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
8585 %tid = call i32 @llvm.amdgcn.workitem.id.x()
8686 %tid.ext = sext i32 %tid to i64
1919 ; BOTH-LABEL: {{^}}v_rotl_i64:
2020 ; SI-DAG: v_lshl_b64
2121 ; VI-DAG: v_lshlrev_b64
22 ; BOTH-DAG: v_sub_i32
22 ; BOTH-DAG: v_sub_{{[iu]}}32
2323 ; SI: v_lshr_b64
2424 ; VI: v_lshrrev_b64
2525 ; BOTH: v_or_b32
1616 }
1717
1818 ; BOTH-LABEL: {{^}}v_rotr_i64:
19 ; BOTH-DAG: v_sub_i32
19 ; BOTH-DAG: v_sub_{{[iu]}}32
2020 ; SI-DAG: v_lshr_b64
2121 ; SI-DAG: v_lshl_b64
2222 ; VI-DAG: v_lshrrev_b64
4848 }
4949
5050 ; FUNC-LABEL: {{^}}v_saddo_i64:
51 ; SI: v_add_i32
51 ; SI: v_add_{{[iu]}}32
5252 ; SI: v_addc_u32
5353 define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
5454 %a = load i64, i64 addrspace(1)* %aptr, align 4
4949 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
5050 ; This constant isn't folded, because it has multiple uses.
5151 ; GCN-DAG: v_mov_b32_e32 [[K8000:v[0-9]+]], 0x8004
52 ; GCN-DAG: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]]
52 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]]
5353 ; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
5454
5555 define amdgpu_kernel void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
8686 }
8787
8888 ; GCN-LABEL: {{^}}neg_vaddr_offset_inbounds:
89 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}}
89 ; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}}
9090 ; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
9191 define amdgpu_kernel void @neg_vaddr_offset_inbounds(i32 %offset) {
9292 entry:
9898 }
9999
100100 ; GCN-LABEL: {{^}}neg_vaddr_offset:
101 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}}
101 ; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}}
102102 ; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
103103 define amdgpu_kernel void @neg_vaddr_offset(i32 %offset) {
104104 entry:
3636 ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
3737 ; SI-DAG: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
3838 ; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
39 ; SI: v_add_i32
39 ; SI: v_add_{{[iu]}}32
4040 ; SI: v_lshrrev_b32
4141 ; SI: v_ashrrev_i32
42 ; SI: v_add_i32
42 ; SI: v_add_{{[iu]}}32
4343 ; SI: buffer_store_dword
4444 ; SI: s_endpgm
4545 define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
33
44 ; GCN-LABEL: {{^}}add_shr_i32:
55 ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}}
6 ; NOSDWA: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]]
7 ; NOSDWA-NOT: v_add_i32_sdwa
8
9 ; SDWA: v_add_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
6 ; NOSDWA: v_add_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]]
7 ; NOSDWA-NOT: v_add_{{[_cou]*}}32_sdwa
8
9 ; SDWA: v_add_{{[_cou]*}}32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1010
1111 define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
1212 %a = load i32, i32 addrspace(1)* %in, align 4
1818
1919 ; GCN-LABEL: {{^}}sub_shr_i32:
2020 ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}}
21 ; NOSDWA: v_subrev_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]]
22 ; NOSDWA-NOT: v_subrev_i32_sdwa
23
24 ; SDWA: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
21 ; NOSDWA: v_subrev_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]]
22 ; NOSDWA-NOT: v_subrev_{{[_cou]*}}32_sdwa
23
24 ; SDWA: v_subrev_{{[_cou]*}}32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2525
2626 define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
2727 %a = load i32, i32 addrspace(1)* %in, align 4
425425 }
426426
427427 ; GCN-LABEL: {{^}}add_bb_v2i16:
428 ; NOSDWA-NOT: v_add_i32_sdwa
429
430 ; VI: v_add_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
428 ; NOSDWA-NOT: v_add_{{[_cou]*}}32_sdwa
429
430 ; VI: v_add_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
431431
432432 ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
433433
44 ; used in an REG_SEQUENCE that also needs to be handled.
55
66 ; SI-LABEL: {{^}}test_dup_operands:
7 ; SI: v_add_i32_e32
7 ; SI: v_add_{{[iu]}}32_e32
88 define amdgpu_kernel void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
99 %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
1010 %lo = extractelement <2 x i32> %a, i32 0
320320 ; CHECK: s_cmp_eq_u32
321321 ; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]]
322322
323 ; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
323 ; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
324324
325325 ; [[END]]:
326326 ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
44
55 ; CHECK-LABEL: {{^}}add_const_offset:
66 ; CHECK: v_lshlrev_b32_e32 v[[SHL:[0-9]+]], 4, v0
7 ; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 0xc80, v[[SHL]]
7 ; CHECK: v_add_u32_e32 v[[ADD:[0-9]+]], vcc, 0xc80, v[[SHL]]
88 ; CHECK-NOT: v_lshl
9 ; CHECK: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]]
9 ; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]]
1010 ; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
1111 define amdgpu_kernel void @add_const_offset(i32 addrspace(1)* nocapture %arg) {
1212 bb:
2323 ; CHECK: v_lshlrev_b32_e32 v[[SHL:[0-9]+]], 4, v0
2424 ; CHECK: v_or_b32_e32 v[[OR:[0-9]+]], 0x1000, v[[SHL]]
2525 ; CHECK-NOT: v_lshl
26 ; CHECK: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]]
26 ; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]]
2727 ; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
2828 define amdgpu_kernel void @or_const_offset(i32 addrspace(1)* nocapture %arg) {
2929 bb:
3434 ; GCN-LABEL: {{^}}load_shl_base_lds_1:
3535 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
3636 ; GCN: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
37 ; GCN: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
37 ; GCN: v_add_{{[iu]}}32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
3838 ; GCN-DAG: buffer_store_dword [[RESULT]]
3939 ; GCN-DAG: buffer_store_dword [[ADDUSE]]
4040 ; GCN: s_endpgm
300300 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
301301 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
302302 ; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:65528
303 ; GCN-DAG: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 0x1fff0, [[SCALE1]]
303 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 0x1fff0, [[SCALE1]]
304304 ; GCN: ds_write_b32 [[ADD1]], v{{[0-9]+$}}
305305 define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
306306 %idx.add = add nuw i32 %idx, 8191
314314 }
315315
316316 ; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_lds_offset:
317 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x1000, v0
317 ; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x1000, v0
318318 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]]
319319 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
320320 ; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+$}}
352352 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
353353 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
354354 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen offset:4088
355 ; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x1ff0, [[SCALE1]]
355 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x1ff0, [[SCALE1]]
356356 ; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[0:3], s4 offen{{$}}
357357 define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #0 {
358358 %idx = zext i16 %idx.arg to i32
366366 ret void
367367 }
368368 ; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_private_offset:
369 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x100, v0
369 ; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x100, v0
370370 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]]
371371 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
372372 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen{{$}}
55
66 ; GCN-LABEL: {{^}}v_test_i32_x_sub_64:
77 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
8 ; GCN: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
8 ; GCN: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
99 define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
1010 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1111 %tid.ext = sext i32 %tid to i64
2020 ; GCN-LABEL: {{^}}v_test_i32_x_sub_64_multi_use:
2121 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
2222 ; GCN: {{buffer|flat}}_load_dword [[Y:v[0-9]+]]
23 ; GCN-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
24 ; GCN-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
23 ; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
24 ; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
2525 define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
2626 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2727 %tid.ext = sext i32 %tid to i64
3838
3939 ; GCN-LABEL: {{^}}v_test_i32_64_sub_x:
4040 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
41 ; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
41 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
4242 define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
4343 %tid = call i32 @llvm.amdgcn.workitem.id.x()
4444 %tid.ext = sext i32 %tid to i64
5252
5353 ; GCN-LABEL: {{^}}v_test_i32_x_sub_65:
5454 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
55 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]]
55 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]]
5656 define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
5757 %tid = call i32 @llvm.amdgcn.workitem.id.x()
5858 %tid.ext = sext i32 %tid to i64
6666
6767 ; GCN-LABEL: {{^}}v_test_i32_65_sub_x:
6868 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
69 ; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]]
69 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]]
7070 define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
7171 %tid = call i32 @llvm.amdgcn.workitem.id.x()
7272 %tid.ext = sext i32 %tid to i64
8080
8181 ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg16:
8282 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
83 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 16, [[X]]
83 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 16, [[X]]
8484 define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
8585 %tid = call i32 @llvm.amdgcn.workitem.id.x()
8686 %tid.ext = sext i32 %tid to i64
9494
9595 ; GCN-LABEL: {{^}}v_test_i32_neg16_sub_x:
9696 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
97 ; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, -16, [[X]]
97 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, -16, [[X]]
9898 define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
9999 %tid = call i32 @llvm.amdgcn.workitem.id.x()
100100 %tid.ext = sext i32 %tid to i64
108108
109109 ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg17:
110110 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
111 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 17, [[X]]
111 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 17, [[X]]
112112 define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
113113 %tid = call i32 @llvm.amdgcn.workitem.id.x()
114114 %tid.ext = sext i32 %tid to i64
122122
123123 ; GCN-LABEL: {{^}}v_test_i32_neg17_sub_x:
124124 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
125 ; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]]
125 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]]
126126 define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
127127 %tid = call i32 @llvm.amdgcn.workitem.id.x()
128128 %tid.ext = sext i32 %tid to i64
33
44 ; GCN-LABEL: {{^}}i32_fastcc_i32_i32:
55 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6 ; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0
6 ; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0
77 ; GCN-NEXT: s_setpc_b64
88 define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
99 %add0 = add i32 %arg0, %arg1
1212
1313 ; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object:
1414 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15 ; GCN: v_add_i32_e32 v0, vcc, v1, v
15 ; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v
1616 ; GCN: s_mov_b32 s5, s32
1717 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24
1818 ; GCN: s_waitcnt vmcnt(0)
8282 ; GCN-NEXT: s_mov_b32 s5, s32
8383 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4
8484 ; GCN-NEXT: s_waitcnt vmcnt(0)
85 ; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0
85 ; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0
8686 ; GCN-NEXT: s_setpc_b64 s[30:31]
8787 define fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32* byval align 4 %arg1) #1 {
8888 %arg1.load = load i32, i32* %arg1, align 4
121121 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122122 ; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s5 offset:4
123123 ; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s5 offset:8
124 ; GCN-DAG: v_add_i32_e32 v0, vcc, v1, v0
125 ; GCN: v_add_i32_e32 v0, vcc, [[LOAD_0]], v0
126 ; GCN: v_add_i32_e32 v0, vcc, [[LOAD_1]], v0
124 ; GCN-DAG: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0
125 ; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_0]], v0
126 ; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_1]], v0
127127 ; GCN-NEXT: s_setpc_b64
128128 define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 {
129129 %val_firststack = extractvalue [32 x i32] %large, 30
1616 }
1717
1818 ; FUNC-LABEL: {{^}}v_abs_i32:
19 ; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
19 ; GCN: v_sub_{{[iu]}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
2020 ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
21 ; GCN: v_add_i32
21 ; GCN: v_add_{{[iu]}}32
2222
2323 ; EG: MAX_INT
2424 define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
3232 }
3333
3434 ; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
35 ; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
35 ; GCN: v_sub_{{[iu]}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
3636 ; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
3737 ; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MAX]], [[MAX]]
3838 define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
6767 }
6868
6969 ; FUNC-LABEL: {{^}}v_abs_v2i32:
70 ; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
71 ; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
70 ; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
71 ; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
7272
7373 ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
7474 ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
7575
76 ; GCN: v_add_i32
77 ; GCN: v_add_i32
76 ; GCN: v_add_{{[iu]}}32
77 ; GCN: v_add_{{[iu]}}32
7878
7979 ; EG: MAX_INT
8080 ; EG: MAX_INT
126126 }
127127
128128 ; FUNC-LABEL: {{^}}v_abs_v4i32:
129 ; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
130 ; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
131 ; GCN-DAG: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
132 ; GCN-DAG: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
129 ; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
130 ; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
131 ; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
132 ; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
133133
134134 ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
135135 ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
136136 ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
137137 ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
138138
139 ; GCN: v_add_i32
140 ; GCN: v_add_i32
141 ; GCN: v_add_i32
142 ; GCN: v_add_i32
139 ; GCN: v_add_{{[iu]}}32
140 ; GCN: v_add_{{[iu]}}32
141 ; GCN: v_add_{{[iu]}}32
142 ; GCN: v_add_{{[iu]}}32
143143
144144 ; EG: MAX_INT
145145 ; EG: MAX_INT
77 ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
88 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
99
10 ; VI: v_sub_i32_e32
11 ; VI-DAG: v_sub_i32_e32
10 ; VI: v_sub_u32_e32
11 ; VI-DAG: v_sub_u32_e32
1212 ; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
1313 ; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
14 ; VI: v_add_i32_e32
15 ; VI: v_add_i32_e32
14 ; VI: v_add_u32_e32
15 ; VI: v_add_u32_e32
1616 ; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1717
1818 ; CI: v_sub_i32_e32
203203
204204 ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large:
205205 ; GCN-NEXT: BB#
206 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
206 ; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, 0x1000, v0
207207 ; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
208208 define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 {
209209 main_body:
2121 ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493
2222 ; SI: v_mul_hi_i32 {{v[0-9]+}}, {{v[0-9]+}}, [[MAGIC]]
2323 ; SI: v_mul_lo_i32
24 ; SI: v_sub_i32
24 ; SI: v_sub_{{[iu]}}32
2525 ; SI: s_endpgm
2626 define amdgpu_kernel void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
2727 %num = load i32, i32 addrspace(1) * %in
5050 }
5151
5252 ; FUNC-LABEL: {{^}}v_ssubo_i64:
53 ; SI: v_sub_i32_e32
53 ; SI: v_sub_{{[iu]}}32_e32
5454 ; SI: v_subb_u32_e32
5555 define amdgpu_kernel void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
5656 %a = load i64, i64 addrspace(1)* %aptr, align 4
9797 ; GCN: s_waitcnt
9898 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
9999
100 ; VI-DAG: v_add_i32_e32
100 ; VI-DAG: v_add_u32_e32
101101 ; VI-DAG: v_addc_u32_e32
102102 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
103103
118118 ; GCN: s_waitcnt
119119 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
120120
121 ; VI-DAG: v_add_i32_e32
121 ; VI-DAG: v_add_u32_e32
122122 ; VI-DAG: v_addc_u32_e32
123123 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
124124
138138 ; GCN: s_waitcnt
139139 ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
140140
141 ; VI-DAG: v_add_i32_e32
141 ; VI-DAG: v_add_u32_e32
142142 ; VI-DAG: v_addc_u32_e32
143143 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
144144 ; VI: flat_store_byte v[0:1], v{{[0-9]$}}
159159 ; GCN: s_waitcnt
160160 ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
161161
162 ; VI-DAG: v_add_i32_e32
162 ; VI-DAG: v_add_u32_e32
163163 ; VI-DAG: v_addc_u32_e32
164164 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
165165
271271 ; GCN: s_waitcnt
272272 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
273273
274 ; VI-DAG: v_add_i32_e32
274 ; VI-DAG: v_add_u32_e32
275275 ; VI-DAG: v_addc_u32_e32
276276 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
277277 ; VI: flat_store_short v[0:1], v2{{$}}
288288
289289 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
290290 ; GCN: s_waitcnt
291 ; GCN: v_add_i32_e32
292 ; GCN: v_addc_u32_e32
291 ; GCN: v_add_{{[_cou]*}}32_e32
292 ; VI: v_addc_u32_e32
293 ; GFX9: v_addc_co_u32_e32
293294
294295 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
295296 ; VI: flat_store_short v[0:1], v2{{$}}
309310 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
310311
311312 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
312 ; VI-DAG: v_add_i32_e32
313 ; VI-DAG: v_add_u32_e32
313314 ; VI-DAG: v_addc_u32_e32
314315 ; VI: flat_store_byte v[0:1], v2{{$}}
315316 ; GCN-NEXT: s_waitcnt
326327
327328 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
328329 ; GCN: s_waitcnt
329 ; GCN-DAG: v_add_i32_e32
330 ; GCN-DAG: v_addc_u32_e32
330 ; GCN-DAG: v_add_{{[_cou]*}}32_e32
331 ; VI-DAG: v_addc_u32_e32
332 ; GFX9-DAG: v_addc_co_u32_e32
331333
332334 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
333335 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
4848 ; GCN-LABEL: {{^}}s_test_sub_v2i16_kernarg:
4949 ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
5050
51 ; VI: v_subrev_i32_e32
52 ; VI: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
51 ; VI: v_subrev_u32_e32
52 ; VI: v_subrev_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
5353 define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 {
5454 %add = sub <2 x i16> %a, %b
5555 store <2 x i16> %add, <2 x i16> addrspace(1)* %out
2121 ; FIXME: Could do scalar
2222
2323 ; FUNC-LABEL: {{^}}s_uaddo_i32:
24 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
24 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
2525 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
2626
2727 ; EG: ADDC_UINT
3636 }
3737
3838 ; FUNC-LABEL: {{^}}v_uaddo_i32:
39 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
39 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
4040 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
4141
4242 ; EG: ADDC_UINT
5757 }
5858
5959 ; FUNC-LABEL: {{^}}v_uaddo_i32_novcc:
60 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
60 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
6161 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
6262
6363 ; EG: ADDC_UINT
9494 }
9595
9696 ; FUNC-LABEL: {{^}}v_uaddo_i64:
97 ; GCN: v_add_i32
97 ; GCN: v_add_{{[iu]}}32
9898 ; GCN: v_addc_u32
9999
100100 ; EG: ADDC_UINT
2929 ; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]]
3030 ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
3131 ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
32 ; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
32 ; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
3333 ; SI: v_cndmask_b32_e64
3434 ; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
35 ; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
36 ; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
35 ; SI-DAG: v_add_{{[iu]}}32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
36 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
3737 ; SI: v_cndmask_b32_e64
3838 ; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
3939 ; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
40 ; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
41 ; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
42 ; SI-DAG: v_cndmask_b32_e64
43 ; SI-DAG: v_cndmask_b32_e64
44 ; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]],
45 ; SI-DAG: v_subrev_i32_e32 [[Remainder_S_Den:v[0-9]+]],
40 ; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
41 ; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
42 ; SI-DAG: v_cndmask_b32_e64
43 ; SI-DAG: v_cndmask_b32_e64
44 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Quotient_S_One:v[0-9]+]],
45 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Remainder_S_Den:v[0-9]+]],
4646 ; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
4747 ; SI-DAG: v_cndmask_b32_e64
4848 ; SI-DAG: v_cndmask_b32_e64
49 ; SI-DAG: v_add_i32_e32 [[Remainder_A_Den:v[0-9]+]],
49 ; SI-DAG: v_add_{{[iu]}}32_e32 [[Remainder_A_Den:v[0-9]+]],
5050 ; SI-DAG: v_cndmask_b32_e64
5151 ; SI-DAG: v_cndmask_b32_e64
5252 ; SI: s_endpgm
113113 ; SI-DAG: v_rcp_iflag_f32_e32
114114 ; SI-DAG: v_mul_hi_u32
115115 ; SI-DAG: v_mul_lo_i32
116 ; SI-DAG: v_sub_i32_e32
117 ; SI-DAG: v_cndmask_b32_e64
118 ; SI-DAG: v_mul_hi_u32
119 ; SI-DAG: v_add_i32_e32
120 ; SI-DAG: v_subrev_i32_e32
121 ; SI-DAG: v_cndmask_b32_e64
122 ; SI-DAG: v_mul_hi_u32
123 ; SI-DAG: v_mul_lo_i32
124 ; SI-DAG: v_subrev_i32_e32
125 ; SI-DAG: v_cndmask_b32_e64
126 ; SI-DAG: v_cndmask_b32_e64
127 ; SI-DAG: v_and_b32_e32
128 ; SI-DAG: v_add_i32_e32
129 ; SI-DAG: v_subrev_i32_e32
130 ; SI-DAG: v_cndmask_b32_e64
131 ; SI-DAG: v_cndmask_b32_e64
132 ; SI-DAG: v_add_i32_e32
133 ; SI-DAG: v_subrev_i32_e32
134 ; SI-DAG: v_cndmask_b32_e64
135 ; SI-DAG: v_cndmask_b32_e64
136 ; SI-DAG: v_rcp_iflag_f32_e32
137 ; SI-DAG: v_mul_hi_u32
138 ; SI-DAG: v_mul_lo_i32
139 ; SI-DAG: v_sub_i32_e32
140 ; SI-DAG: v_cndmask_b32_e64
141 ; SI-DAG: v_mul_hi_u32
142 ; SI-DAG: v_add_i32_e32
143 ; SI-DAG: v_subrev_i32_e32
144 ; SI-DAG: v_cndmask_b32_e64
145 ; SI-DAG: v_mul_hi_u32
146 ; SI-DAG: v_mul_lo_i32
147 ; SI-DAG: v_subrev_i32_e32
148 ; SI-DAG: v_cndmask_b32_e64
149 ; SI-DAG: v_cndmask_b32_e64
150 ; SI-DAG: v_and_b32_e32
151 ; SI-DAG: v_add_i32_e32
152 ; SI-DAG: v_subrev_i32_e32
153 ; SI-DAG: v_cndmask_b32_e64
154 ; SI-DAG: v_cndmask_b32_e64
155 ; SI-DAG: v_add_i32_e32
156 ; SI-DAG: v_subrev_i32_e32
116 ; SI-DAG: v_sub_{{[iu]}}32_e32
117 ; SI-DAG: v_cndmask_b32_e64
118 ; SI-DAG: v_mul_hi_u32
119 ; SI-DAG: v_add_{{[iu]}}32_e32
120 ; SI-DAG: v_subrev_{{[iu]}}32_e32
121 ; SI-DAG: v_cndmask_b32_e64
122 ; SI-DAG: v_mul_hi_u32
123 ; SI-DAG: v_mul_lo_i32
124 ; SI-DAG: v_subrev_{{[iu]}}32_e32
125 ; SI-DAG: v_cndmask_b32_e64
126 ; SI-DAG: v_cndmask_b32_e64
127 ; SI-DAG: v_and_b32_e32
128 ; SI-DAG: v_add_{{[iu]}}32_e32
129 ; SI-DAG: v_subrev_{{[iu]}}32_e32
130 ; SI-DAG: v_cndmask_b32_e64
131 ; SI-DAG: v_cndmask_b32_e64
132 ; SI-DAG: v_add_{{[iu]}}32_e32
133 ; SI-DAG: v_subrev_{{[iu]}}32_e32
134 ; SI-DAG: v_cndmask_b32_e64
135 ; SI-DAG: v_cndmask_b32_e64
136 ; SI-DAG: v_rcp_iflag_f32_e32
137 ; SI-DAG: v_mul_hi_u32
138 ; SI-DAG: v_mul_lo_i32
139 ; SI-DAG: v_sub_{{[iu]}}32_e32
140 ; SI-DAG: v_cndmask_b32_e64
141 ; SI-DAG: v_mul_hi_u32
142 ; SI-DAG: v_add_{{[iu]}}32_e32
143 ; SI-DAG: v_subrev_{{[iu]}}32_e32
144 ; SI-DAG: v_cndmask_b32_e64
145 ; SI-DAG: v_mul_hi_u32
146 ; SI-DAG: v_mul_lo_i32
147 ; SI-DAG: v_subrev_{{[iu]}}32_e32
148 ; SI-DAG: v_cndmask_b32_e64
149 ; SI-DAG: v_cndmask_b32_e64
150 ; SI-DAG: v_and_b32_e32
151 ; SI-DAG: v_add_{{[iu]}}32_e32
152 ; SI-DAG: v_subrev_{{[iu]}}32_e32
153 ; SI-DAG: v_cndmask_b32_e64
154 ; SI-DAG: v_cndmask_b32_e64
155 ; SI-DAG: v_add_{{[iu]}}32_e32
156 ; SI-DAG: v_subrev_{{[iu]}}32_e32
157157 ; SI-DAG: v_cndmask_b32_e64
158158 ; SI-DAG: v_cndmask_b32_e64
159159 ; SI: s_endpgm
263263 ; SI-DAG: v_rcp_iflag_f32_e32
264264 ; SI-DAG: v_mul_hi_u32
265265 ; SI-DAG: v_mul_lo_i32
266 ; SI-DAG: v_sub_i32_e32
267 ; SI-DAG: v_cndmask_b32_e64
268 ; SI-DAG: v_mul_hi_u32
269 ; SI-DAG: v_add_i32_e32
270 ; SI-DAG: v_subrev_i32_e32
271 ; SI-DAG: v_cndmask_b32_e64
272 ; SI-DAG: v_mul_hi_u32
273 ; SI-DAG: v_mul_lo_i32
274 ; SI-DAG: v_subrev_i32_e32
275 ; SI-DAG: v_cndmask_b32_e64
276 ; SI-DAG: v_cndmask_b32_e64
277 ; SI-DAG: v_and_b32_e32
278 ; SI-DAG: v_add_i32_e32
279 ; SI-DAG: v_subrev_i32_e32
280 ; SI-DAG: v_cndmask_b32_e64
281 ; SI-DAG: v_cndmask_b32_e64
282 ; SI-DAG: v_add_i32_e32
283 ; SI-DAG: v_subrev_i32_e32
284 ; SI-DAG: v_cndmask_b32_e64
285 ; SI-DAG: v_cndmask_b32_e64
286 ; SI-DAG: v_rcp_iflag_f32_e32
287 ; SI-DAG: v_mul_hi_u32
288 ; SI-DAG: v_mul_lo_i32
289 ; SI-DAG: v_sub_i32_e32
290 ; SI-DAG: v_cndmask_b32_e64
291 ; SI-DAG: v_mul_hi_u32
292 ; SI-DAG: v_add_i32_e32
293 ; SI-DAG: v_subrev_i32_e32
294 ; SI-DAG: v_cndmask_b32_e64
295 ; SI-DAG: v_mul_hi_u32
296 ; SI-DAG: v_mul_lo_i32
297 ; SI-DAG: v_subrev_i32_e32
298 ; SI-DAG: v_cndmask_b32_e64
299 ; SI-DAG: v_cndmask_b32_e64
300 ; SI-DAG: v_and_b32_e32
301 ; SI-DAG: v_add_i32_e32
302 ; SI-DAG: v_subrev_i32_e32
303 ; SI-DAG: v_cndmask_b32_e64
304 ; SI-DAG: v_cndmask_b32_e64
305 ; SI-DAG: v_add_i32_e32
306 ; SI-DAG: v_subrev_i32_e32
307 ; SI-DAG: v_cndmask_b32_e64
308 ; SI-DAG: v_cndmask_b32_e64
309 ; SI-DAG: v_rcp_iflag_f32_e32
310 ; SI-DAG: v_mul_hi_u32
311 ; SI-DAG: v_mul_lo_i32
312 ; SI-DAG: v_sub_i32_e32
313 ; SI-DAG: v_cndmask_b32_e64
314 ; SI-DAG: v_mul_hi_u32
315 ; SI-DAG: v_add_i32_e32
316 ; SI-DAG: v_subrev_i32_e32
317 ; SI-DAG: v_cndmask_b32_e64
318 ; SI-DAG: v_mul_hi_u32
319 ; SI-DAG: v_mul_lo_i32
320 ; SI-DAG: v_subrev_i32_e32
321 ; SI-DAG: v_cndmask_b32_e64
322 ; SI-DAG: v_cndmask_b32_e64
323 ; SI-DAG: v_and_b32_e32
324 ; SI-DAG: v_add_i32_e32
325 ; SI-DAG: v_subrev_i32_e32
326 ; SI-DAG: v_cndmask_b32_e64
327 ; SI-DAG: v_cndmask_b32_e64
328 ; SI-DAG: v_add_i32_e32
329 ; SI-DAG: v_subrev_i32_e32
330 ; SI-DAG: v_cndmask_b32_e64
331 ; SI-DAG: v_cndmask_b32_e64
332 ; SI-DAG: v_rcp_iflag_f32_e32
333 ; SI-DAG: v_mul_hi_u32
334 ; SI-DAG: v_mul_lo_i32
335 ; SI-DAG: v_sub_i32_e32
336 ; SI-DAG: v_cndmask_b32_e64
337 ; SI-DAG: v_mul_hi_u32
338 ; SI-DAG: v_add_i32_e32
339 ; SI-DAG: v_subrev_i32_e32
266 ; SI-DAG: v_sub_{{[iu]}}32_e32
267 ; SI-DAG: v_cndmask_b32_e64
268 ; SI-DAG: v_mul_hi_u32
269 ; SI-DAG: v_add_{{[iu]}}32_e32
270 ; SI-DAG: v_subrev_{{[iu]}}32_e32
271 ; SI-DAG: v_cndmask_b32_e64
272 ; SI-DAG: v_mul_hi_u32
273 ; SI-DAG: v_mul_lo_i32
274 ; SI-DAG: v_subrev_{{[iu]}}32_e32
275 ; SI-DAG: v_cndmask_b32_e64
276 ; SI-DAG: v_cndmask_b32_e64
277 ; SI-DAG: v_and_b32_e32
278 ; SI-DAG: v_add_{{[iu]}}32_e32
279 ; SI-DAG: v_subrev_{{[iu]}}32_e32
280 ; SI-DAG: v_cndmask_b32_e64
281 ; SI-DAG: v_cndmask_b32_e64
282 ; SI-DAG: v_add_{{[iu]}}32_e32
283 ; SI-DAG: v_subrev_{{[iu]}}32_e32
284 ; SI-DAG: v_cndmask_b32_e64
285 ; SI-DAG: v_cndmask_b32_e64
286 ; SI-DAG: v_rcp_iflag_f32_e32
287 ; SI-DAG: v_mul_hi_u32
288 ; SI-DAG: v_mul_lo_i32
289 ; SI-DAG: v_sub_{{[iu]}}32_e32
290 ; SI-DAG: v_cndmask_b32_e64
291 ; SI-DAG: v_mul_hi_u32
292 ; SI-DAG: v_add_{{[iu]}}32_e32
293 ; SI-DAG: v_subrev_{{[iu]}}32_e32
294 ; SI-DAG: v_cndmask_b32_e64
295 ; SI-DAG: v_mul_hi_u32
296 ; SI-DAG: v_mul_lo_i32
297 ; SI-DAG: v_subrev_{{[iu]}}32_e32
298 ; SI-DAG: v_cndmask_b32_e64
299 ; SI-DAG: v_cndmask_b32_e64
300 ; SI-DAG: v_and_b32_e32
301 ; SI-DAG: v_add_{{[iu]}}32_e32
302 ; SI-DAG: v_subrev_{{[iu]}}32_e32
303 ; SI-DAG: v_cndmask_b32_e64
304 ; SI-DAG: v_cndmask_b32_e64
305 ; SI-DAG: v_add_{{[iu]}}32_e32
306 ; SI-DAG: v_subrev_{{[iu]}}32_e32
307 ; SI-DAG: v_cndmask_b32_e64
308 ; SI-DAG: v_cndmask_b32_e64
309 ; SI-DAG: v_rcp_iflag_f32_e32
310 ; SI-DAG: v_mul_hi_u32
311 ; SI-DAG: v_mul_lo_i32
312 ; SI-DAG: v_sub_{{[iu]}}32_e32
313 ; SI-DAG: v_cndmask_b32_e64
314 ; SI-DAG: v_mul_hi_u32
315 ; SI-DAG: v_add_{{[iu]}}32_e32
316 ; SI-DAG: v_subrev_{{[iu]}}32_e32
317 ; SI-DAG: v_cndmask_b32_e64
318 ; SI-DAG: v_mul_hi_u32
319 ; SI-DAG: v_mul_lo_i32
320 ; SI-DAG: v_subrev_{{[iu]}}32_e32
321 ; SI-DAG: v_cndmask_b32_e64
322 ; SI-DAG: v_cndmask_b32_e64
323 ; SI-DAG: v_and_b32_e32
324 ; SI-DAG: v_add_{{[iu]}}32_e32
325 ; SI-DAG: v_subrev_{{[iu]}}32_e32
326 ; SI-DAG: v_cndmask_b32_e64
327 ; SI-DAG: v_cndmask_b32_e64
328 ; SI-DAG: v_add_{{[iu]}}32_e32
329 ; SI-DAG: v_subrev_{{[iu]}}32_e32
330 ; SI-DAG: v_cndmask_b32_e64
331 ; SI-DAG: v_cndmask_b32_e64
332 ; SI-DAG: v_rcp_iflag_f32_e32
333 ; SI-DAG: v_mul_hi_u32
334 ; SI-DAG: v_mul_lo_i32
335 ; SI-DAG: v_sub_{{[iu]}}32_e32
336 ; SI-DAG: v_cndmask_b32_e64
337 ; SI-DAG: v_mul_hi_u32
338 ; SI-DAG: v_add_{{[iu]}}32_e32
339 ; SI-DAG: v_subrev_{{[iu]}}32_e32
340340 ; SI-DAG: v_cndmask_b32_e64
341341 ; SI: s_endpgm
342342 define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
2020 ; GCN-DAG: v_cmp_eq_u64
2121 ; GCN-DAG: v_cmp_gt_u64
2222
23 ; GCN: v_add_i32_e32 [[VR:v[0-9]+]]
23 ; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]]
2424 ; GCN: v_cvt_f16_f32_e32 [[VR_F16:v[0-9]+]], [[VR]]
2525 ; GCN: {{buffer|flat}}_store_short {{.*}}[[VR_F16]]
2626 define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
5151 ; GCN-DAG: v_cmp_eq_u64
5252 ; GCN-DAG: v_cmp_gt_u64
5353
54 ; GCN: v_add_i32_e32 [[VR:v[0-9]+]]
54 ; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]]
5555 ; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]]
5656 define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
5757 %tid = call i32 @llvm.r600.read.tidig.x()
559559 }
560560
561561 ; GCN-LABEL: {{^}}move_to_valu_vgpr_operand_phi:
562 ; GCN: v_add_i32_e32
562 ; GCN: v_add_{{[iu]}}32_e32
563563 ; GCN: ds_write_b32
564564 define void @move_to_valu_vgpr_operand_phi(i32 addrspace(3)* %out) {
565565 bb0:
2020 ; FUNC-LABEL: {{^}}test_urem_i32_7:
2121 ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925
2222 ; SI: v_mul_hi_u32 [[MAGIC]], {{v[0-9]+}}
23 ; SI: v_subrev_i32
23 ; SI: v_subrev_{{[iu]}}32
2424 ; SI: v_mul_lo_i32
25 ; SI: v_sub_i32
25 ; SI: v_sub_{{[iu]}}32
2626 ; SI: buffer_store_dword
2727 ; SI: s_endpgm
2828 define amdgpu_kernel void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
2121 ; FIXME: Could do scalar
2222
2323 ; FUNC-LABEL: {{^}}s_usubo_i32:
24 ; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
24 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
2525 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
2626
2727 ; EG-DAG: SUBB_UINT
3636 }
3737
3838 ; FUNC-LABEL: {{^}}v_usubo_i32:
39 ; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
39 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
4040 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
4141
4242 ; EG-DAG: SUBB_UINT
5757 }
5858
5959 ; FUNC-LABEL: {{^}}v_usubo_i32_novcc:
60 ; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
60 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
6161 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
6262
6363 ; EG-DAG: SUBB_UINT
9696 }
9797
9898 ; FUNC-LABEL: {{^}}v_usubo_i64:
99 ; GCN: v_sub_i32
99 ; GCN: v_sub_{{[iu]}}32
100100 ; GCN: v_subb_u32
101101
102102 ; EG-DAG: SUBB_UINT
33 ; Test that we correctly commute a sub instruction
44 ; FUNC-LABEL: {{^}}sub_rev:
55 ; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s
6 ; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s
6 ; SI: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s
77
88 ; ModuleID = 'vop-shrink.ll'
99
168168 ;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
169169 ;CHECK: buffer_load_dword
170170 ;CHECK: buffer_load_dword
171 ;CHECK: v_add_i32_e32
171 ;CHECK: v_add_{{[iu]}}32_e32
172172 define amdgpu_ps float @test_wwm2(i32 inreg %idx0, i32 inreg %idx1) {
173173 main_body:
174174 %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
302302 ;CHECK: v_mov_b32_e32
303303 ;CHECK: s_not_b64 exec, exec
304304 ;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
305 ;CHECK: v_add_i32_e32
305 ;CHECK: v_add_{{[iu]}}32_e32
306306 define amdgpu_ps void @test_set_inactive1(i32 inreg %idx) {
307307 main_body:
308308 %src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
None // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX9 %s
1 // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefixes=GCN,VI %s
0 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GFX9 %s
21
32 // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s
43 // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s
54 // FIXME: pre-gfx9 errors should be more useful
65
76
8 // FIXME: These should parse to VOP2 encoding
97 v_add_u32 v1, v2, v3
10 // GFX9: v_add_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x07,0x02,0x00]
11 // ERR-SICIVI: :15: error: invalid operand for instruction
8 // GFX9: v_add_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x68]
9 // ERR-SICIVI: error: instruction not supported on this GPU
1210
1311 v_add_u32 v1, v2, s1
1412 // GFX9: v_add_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x03,0x00,0x00]
15 // ERR-SICIVI: :15: error: invalid operand for instruction
13 // ERR-SICIVI: error: instruction not supported on this GPU
1614
1715 v_add_u32 v1, s1, v2
18 // GFX9: v_add_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x34,0xd1,0x01,0x04,0x02,0x00]
19 // ERR-SICIVI: :15: error: invalid operand for instruction
16 // GFX9: v_add_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x68]
17 // ERR-SICIVI: error: instruction not supported on this GPU
2018
2119 v_add_u32 v1, 4.0, v2
22 // GFX9: v_add_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x34,0xd1,0xf6,0x04,0x02,0x00]
23 // ERR-SICIVI: :15: error: invalid operand for instruction
20 // GFX9: v_add_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x68]
21 // ERR-SICIVI: error: instruction not supported on this GPU
2422
2523 v_add_u32 v1, v2, 4.0
2624 // GFX9: v_add_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0xed,0x01,0x00]
27 // ERR-SICIVI: :15: error: invalid operand for instruction
25 // ERR-SICIVI: error: instruction not supported on this GPU
2826
2927 v_add_u32_e32 v1, v2, v3
3028 // GFX9: v_add_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x68]
31 // ERR-SICIVI: :19: error: invalid operand for instruction
29 // ERR-SICIVI: error: instruction not supported on this GPU
3230
3331 v_add_u32_e32 v1, s1, v3
3432 // GFX9: v_add_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x68]
35 // ERR-SICIVI: :19: error: invalid operand for instruction
33 // ERR-SICIVI: error: instruction not supported on this GPU
3634
3735
3836
3937 v_sub_u32 v1, v2, v3
40 // GFX9: v_sub_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x07,0x02,0x00]
41 // ERR-SICIVI: :15: error: invalid operand for instruction
38 // GFX9: v_sub_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6a]
39 // ERR-SICIVI: error: instruction not supported on this GPU
4240
4341 v_sub_u32 v1, v2, s1
4442 // GFX9: v_sub_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x03,0x00,0x00]
45 // ERR-SICIVI: :15: error: invalid operand for instruction
43 // ERR-SICIVI: error: instruction not supported on this GPU
4644
4745 v_sub_u32 v1, s1, v2
48 // GFX9: v_sub_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x35,0xd1,0x01,0x04,0x02,0x00]
49 // ERR-SICIVI: :15: error: invalid operand for instruction
46 // GFX9: v_sub_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x6a]
47 // ERR-SICIVI: error: instruction not supported on this GPU
5048
5149 v_sub_u32 v1, 4.0, v2
52 // GFX9: v_sub_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x35,0xd1,0xf6,0x04,0x02,0x00]
53 // ERR-SICIVI: :15: error: invalid operand for instruction
50 // GFX9: v_sub_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x6a]
51 // ERR-SICIVI: error: instruction not supported on this GPU
5452
5553 v_sub_u32 v1, v2, 4.0
5654 // GFX9: v_sub_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0xed,0x01,0x00]
57 // ERR-SICIVI: :15: error: invalid operand for instruction
55 // ERR-SICIVI: error: instruction not supported on this GPU
5856
5957 v_sub_u32_e32 v1, v2, v3
6058 // GFX9: v_sub_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6a]
61 // ERR-SICIVI: :19: error: invalid operand for instruction
59 // ERR-SICIVI: error: instruction not supported on this GPU
6260
6361 v_sub_u32_e32 v1, s1, v3
6462 // GFX9: v_sub_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6a]
65 // ERR-SICIVI: :19: error: invalid operand for instruction
63 // ERR-SICIVI: error: instruction not supported on this GPU
6664
6765
6866
6967 v_subrev_u32 v1, v2, v3
70 // GFX9: v_subrev_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x07,0x02,0x00]
71 // ERR-SICIVI: :18: error: invalid operand for instruction
68 // GFX9: v_subrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6c]
69 // ERR-SICIVI: error: instruction not supported on this GPU
7270
7371 v_subrev_u32 v1, v2, s1
7472 // GFX9: v_subrev_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x03,0x00,0x00]
75 // ERR-SICIVI: :18: error: invalid operand for instruction
73 // ERR-SICIVI: error: instruction not supported on this GPU
7674
7775 v_subrev_u32 v1, s1, v2
78 // GFX9: v_subrev_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x36,0xd1,0x01,0x04,0x02,0x00]
79 // ERR-SICIVI: :18: error: invalid operand for instruction
76 // GFX9: v_subrev_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x6c]
77 // ERR-SICIVI: error: instruction not supported on this GPU
8078
8179 v_subrev_u32 v1, 4.0, v2
82 // GFX9: v_subrev_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x36,0xd1,0xf6,0x04,0x02,0x00]
83 // ERR-SICIVI: :18: error: invalid operand for instruction
80 // GFX9: v_subrev_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x6c]
81 // ERR-SICIVI: error: instruction not supported on this GPU
8482
8583 v_subrev_u32 v1, v2, 4.0
8684 // GFX9: v_subrev_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0xed,0x01,0x00]
87 // ERR-SICIVI: :18: error: invalid operand for instruction
85 // ERR-SICIVI: error: instruction not supported on this GPU
8886
8987 v_subrev_u32_e32 v1, v2, v3
9088 // GFX9: v_subrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6c]
91 // ERR-SICIVI: :22: error: invalid operand for instruction
89 // ERR-SICIVI: error: instruction not supported on this GPU
9290
9391 v_subrev_u32_e32 v1, s1, v3
9492 // GFX9: v_subrev_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6c]
95 // ERR-SICIVI: :22: error: invalid operand for instruction
96
97
98
99 v_add_u32 v1, vcc, v2, v3
100 // GCN: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
101
102 v_add_u32 v1, s[0:1], v2, v3
103 // GCN: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
93 // ERR-SICIVI: error: instruction not supported on this GPU
2323
2424 v_mov_b32_e32 v0, 3.125
2525 // GCN: v_mov_b32_e32 v0, 0x40480000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x00,0x48,0x40]
26
27 v_add_i32 v0, vcc, 0.5, v0
28 // SICI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x4a]
29 // VI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x32]
30
31 v_add_i32 v0, vcc, 3.125, v0
32 // SICI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x4a,0x00,0x00,0x48,0x40]
33 // VI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x32,0x00,0x00,0x48,0x40]
9494 v_mul_i32_i24_e64 v1, 3, s3
9595
9696 // SICI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x4a]
97 // VI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x32]
97 // NOVI: error: instruction not supported on this GPU
9898 v_add_i32_e32 v0, vcc, 0.5, v0
9999
100100 // SICI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x4a,0x00,0x00,0x48,0x40]
101 // VI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x32,0x00,0x00,0x48,0x40]
101 // NOVI: error: instruction not supported on this GPU
102102 v_add_i32_e32 v0, vcc, 3.125, v0
103103
104104 //===----------------------------------------------------------------------===//
270270 v_mbcnt_hi_u32_b32_e64 v1, v2, v3
271271
272272 // SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
273 // VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
273 // NOVI: error: instruction not supported on this GPU
274274 v_add_i32_e32 v1, vcc, v2, v3
275275
276276 // SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
277 // VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
277 // NOVI: error: instruction not supported on this GPU
278278 v_add_i32 v1, s[0:1], v2, v3
279279
280280 // SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
281 // VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
281 // NOVI: error: instruction not supported on this GPU
282282 v_add_i32_e64 v1, s[0:1], v2, v3
283283
284284 // SICI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x4a,0xd2,0x02,0x07,0x02,0x00]
285 // VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00]
285 // NOVI: error: instruction not supported on this GPU
286286 v_add_i32_e64 v1, vcc, v2, v3
287287
288 // SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
289 // VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
288 // NOSICI: error: instruction not supported on this GPU
289 // VI: v_add_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
290290 v_add_u32 v1, vcc, v2, v3
291291
292 // SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
293 // VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
292 // NOSICI: error: instruction not supported on this GPU
293 // VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
294294 v_add_u32 v1, s[0:1], v2, v3
295295
296296 // SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
297 // VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
297 // NOVI: error: instruction not supported on this GPU
298298 v_sub_i32 v1, vcc, v2, v3
299299
300300 // SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
301 // VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
301 // NOVI: error: instruction not supported on this GPU
302302 v_sub_i32 v1, s[0:1], v2, v3
303303
304 // SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
305 // VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
304 // NOSICI: error: instruction not supported on this GPU
305 // VI: v_sub_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
306306 v_sub_u32 v1, vcc, v2, v3
307307
308 // SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
309 // VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
308 // NOSICI: error: instruction not supported on this GPU
309 // VI: v_sub_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
310310 v_sub_u32 v1, s[0:1], v2, v3
311311
312312 // SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
313 // VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
313 // NOVI: error: instruction not supported on this GPU
314314 v_subrev_i32 v1, vcc, v2, v3
315315
316316 // SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
317 // VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
317 // NOVI: error: instruction not supported on this GPU
318318 v_subrev_i32 v1, s[0:1], v2, v3
319319
320 // SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
321 // VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
320 // NOSICI: error: instruction not supported on this GPU
321 // VI: v_subrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
322322 v_subrev_u32 v1, vcc, v2, v3
323323
324 // SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
325 // VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
324 // NOSICI: error: instruction not supported on this GPU
325 // VI: v_subrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
326326 v_subrev_u32 v1, s[0:1], v2, v3
327327
328328 // SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
508508 v_ldexp_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
509509
510510 // NOSICI: error:
511 // VI9: v_add_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1]
512 v_add_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
513
514 // NOSICI: error:
515 // VI9: v_sub_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1]
516 v_sub_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
517
518 // NOSICI: error:
519 // VI9: v_subrev_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1]
520 v_subrev_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
521
522 // NOSICI: error:
523 // VI9: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1]
511 // NOGFX9: error:
512 // VI: v_add_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1]
513 v_add_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
514
515 // NOSICI: error:
516 // NOGFX9: error:
517 // VI: v_sub_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1]
518 v_sub_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
519
520 // NOSICI: error:
521 // NOGFX9: error:
522 // VI: v_subrev_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1]
523 v_subrev_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
524
525 // NOSICI: error:
526 // NOGFX9: error:
527 // VI: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1]
524528 v_addc_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
525529
526530 // NOSICI: error:
527 // VI9: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1]
531 // NOGFX9: error:
532 // VI: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1]
528533 v_subb_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
529534
530535 // NOSICI: error:
531 // VI9: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
536 // NOGFX9: error:
537 // VI: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
532538 v_subbrev_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
533539
540 // NOSICI: error:
541 // NOVI: error:
542 // GFX9: v_add_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1]
543 v_add_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
544
545 // NOSICI: error:
546 // NOVI: error:
547 // GFX9: v_sub_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1]
548 v_sub_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
549
550 // NOSICI: error:
551 // NOVI: error:
552 // GFX9: v_subrev_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1]
553 v_subrev_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
554
555 // NOSICI: error:
556 // NOVI: error:
557 // GFX9: v_addc_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1]
558 v_addc_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
559
560 // NOSICI: error:
561 // NOVI: error:
562 // GFX9: v_subb_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1]
563 v_subb_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
564
565 // NOSICI: error:
566 // NOVI: error:
567 // GFX9: v_subbrev_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
568 v_subbrev_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
569
534570 //===----------------------------------------------------------------------===//
535571 // Check that immideates and scalar regs are not supported
536572 //===----------------------------------------------------------------------===//
496496 v_ldexp_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
497497
498498 // NOSICI: error:
499 // GFX89: v_add_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02]
500 v_add_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
501
502 // NOSICI: error:
503 // GFX89: v_sub_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02]
504 v_sub_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
505
506 // NOSICI: error:
507 // GFX89: v_subrev_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02]
508 v_subrev_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
509
510 // NOSICI: error:
511 // GFX89: v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02]
499 // NOGFX9: error:
500 // VI: v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02]
501 v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
502
503 // NOSICI: error:
504 // NOGFX9: error:
505 // VI: v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02]
506 v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
507
508 // NOSICI: error:
509 // NOGFX9: error:
510 // VI: v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02]
511 v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
512
513 // NOSICI: error:
514 // NOGFX9: error:
515 // VI: v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02]
512516 v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
513517
514518 // NOSICI: error:
515 // GFX89: v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02]
519 // NOGFX9: error:
520 // VI: v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02]
516521 v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
517522
518523 // NOSICI: error:
519 // GFX89: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
524 // NOGFX9: error:
525 // VI: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
520526 v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
527
528 // NOSICI: error:
529 // NOVI: error:
530 // GFX9: v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02]
531 v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
532
533 // NOSICI: error:
534 // NOVI: error:
535 // GFX9: v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02]
536 v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
537
538 // NOSICI: error:
539 // NOVI: error:
540 // GFX9: v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02]
541 v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
542
543 // NOSICI: error:
544 // NOVI: error:
545 // GFX9: v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02]
546 v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
547
548 // NOSICI: error:
549 // NOVI: error:
550 // GFX9: v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02]
551 v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
552
553 // NOSICI: error:
554 // NOVI: error:
555 // GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
556 v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
521557
522558 //===----------------------------------------------------------------------===//
523559 // Check VOPC opcodes
9292 # VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
9393 0x01 0x00 0x8d 0xd2 0x02 0x07 0x02 0x00
9494
95 # VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
95 # VI: v_add_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
9696 0x02 0x07 0x02 0x32
9797
98 # VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
98 # VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
9999 0x01 0x00 0x19 0xd1 0x02 0x07 0x02 0x00
100100
101 # VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
101 # VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
102102 0x01 0x00 0x19 0xd1 0x02 0x07 0x02 0x00
103103
104 # VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00]
104 # VI: v_add_u32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00]
105105 0x01 0x6a 0x19 0xd1 0x02 0x07 0x02 0x00
106106
107 # VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
107 # VI: v_add_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
108108 0x02 0x07 0x02 0x32
109109
110 # VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
110 # VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
111111 0x01 0x00 0x19 0xd1 0x02 0x07 0x02 0x00
112112
113 # VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
113 # VI: v_sub_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
114114 0x02 0x07 0x02 0x34
115115
116 # VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
116 # VI: v_sub_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
117117 0x01 0x00 0x1a 0xd1 0x02 0x07 0x02 0x00
118118
119 # VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
119 # VI: v_sub_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
120120 0x02 0x07 0x02 0x34
121121
122 # VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
122 # VI: v_sub_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
123123 0x01 0x00 0x1a 0xd1 0x02 0x07 0x02 0x00
124124
125 # VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
125 # VI: v_subrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
126126 0x02 0x07 0x02 0x36
127127
128 # VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
128 # VI: v_subrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
129129 0x01 0x00 0x1b 0xd1 0x02 0x07 0x02 0x00
130130
131 # VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
131 # VI: v_subrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
132132 0x02 0x07 0x02 0x36
133133
134 # VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
134 # VI: v_subrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
135135 0x01 0x00 0x1b 0xd1 0x02 0x07 0x02 0x00
136136
137137 # VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
1111 s_waitcnt lgkmcnt(0)
1212 s_add_u32 s0, s7, s0
1313 BB0:
14 v_add_i32_e32 v1, vcc, s0, v1
14 v_add_u32_e32 v1, vcc, s0, v1
1515 BB1:
1616 s_movk_i32 s0, 0x483
1717 v_cmp_ge_i32_e32 vcc, s0, v0
3636 v_ashrrev_i32_e32 v77, 31, v76
3737 v_lshlrev_b64 v[10:11], 2, v[76:77]
3838 s_waitcnt lgkmcnt(0)
39 v_add_i32_e32 v10, vcc, s8, v10
39 v_add_u32_e32 v10, vcc, s8, v10
4040 v_mov_b32_e32 v6, s9
4141 v_addc_u32_e32 v11, vcc, v6, v11, vcc
4242 flat_load_dword v0, v[10:11]
5252 // CHECK: s_waitcnt lgkmcnt(0) // 000000000110: BF8C007F
5353 // CHECK: s_add_u32 s0, s7, s0 // 000000000114: 80000007
5454 // CHECK: BB0:
55 // CHECK: v_add_i32_e32 v1, vcc, s0, v1 // 000000000118: 32020200
55 // CHECK: v_add_u32_e32 v1, vcc, s0, v1 // 000000000118: 32020200
5656 // CHECK: BB1:
5757 // CHECK: s_movk_i32 s0, 0x483 // 00000000011C: B0000483
5858 // CHECK: v_cmp_ge_i32_e32 vcc, s0, v0 // 000000000120: 7D8C0000
7373 // CHECK: v_ashrrev_i32_e32 v77, 31, v76 // 000000000250: 229A989F
7474 // CHECK: v_lshlrev_b64 v[10:11], 2, v[76:77] // 000000000254: D28F000A 00029882
7575 // CHECK: s_waitcnt lgkmcnt(0) // 00000000025C: BF8C007F
76 // CHECK: v_add_i32_e32 v10, vcc, s8, v10 // 000000000260: 32141408
76 // CHECK: v_add_u32_e32 v10, vcc, s8, v10 // 000000000260: 32141408
7777 // CHECK: v_mov_b32_e32 v6, s9 // 000000000264: 7E0C0209
7878 // CHECK: v_addc_u32_e32 v11, vcc, v6, v11, vcc // 000000000268: 38161706
7979 // CHECK: flat_load_dword v0, v[10:11] // 00000000026C: DC500000 0000000A
1111 ; LINE: ; {{.*}}source-lines.cl:3
1212 ; LINE: v_mov_b32_e32 v{{[0-9]+}}, 0x888
1313 ; LINE: ; {{.*}}source-lines.cl:4
14 ; LINE: v_add_i32_e32
14 ; LINE: v_add_u32_e32
1515 ; LINE: ; {{.*}}source-lines.cl:5
1616 ; LINE: flat_store_dword
1717 ; Epilogue.
2727 ; SOURCE: ; int var1 = 0x888;
2828 ; SOURCE: v_mov_b32_e32 v{{[0-9]+}}, 0x888
2929 ; SOURCE: ; int var2 = var0 + var1;
30 ; SOURCE: v_add_i32_e32
30 ; SOURCE: v_add_u32_e32
3131 ; SOURCE: ; *Out = var2;
3232 ; SOURCE: flat_store_dword
3333 ; Epilogue.