llvm.org GIT mirror llvm / 5d11fd5
[AMDGPU] Refactor VOP1 and VOP2 instruction TD definitions Differential revision: https://reviews.llvm.org/D24738 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282234 91177308-0d34-0410-b5e6-96231b3b80d8 Valery Pykhtin 4 years ago
11 changed file(s) with 1415 addition(s) and 1727 deletion(s). Raw diff Collapse all Expand all
1111 // S_CBRANCH_CDBGUSER
1212 // S_CBRANCH_CDBGSYS
1313 // S_CBRANCH_CDBGSYS_OR_USER
14 // S_CBRANCH_CDBGSYS_AND_USER
15
16 //===----------------------------------------------------------------------===//
17 // VOP1 Instructions
18 //===----------------------------------------------------------------------===//
19
20 let SubtargetPredicate = isCIVI in {
21
22 let SchedRW = [WriteDoubleAdd] in {
23 defm V_TRUNC_F64 : VOP1Inst , "v_trunc_f64",
24 VOP_F64_F64, ftrunc
25 >;
26 defm V_CEIL_F64 : VOP1Inst , "v_ceil_f64",
27 VOP_F64_F64, fceil
28 >;
29 defm V_FLOOR_F64 : VOP1Inst , "v_floor_f64",
30 VOP_F64_F64, ffloor
31 >;
32 defm V_RNDNE_F64 : VOP1Inst , "v_rndne_f64",
33 VOP_F64_F64, frint
34 >;
35 } // End SchedRW = [WriteDoubleAdd]
36
37 let SchedRW = [WriteQuarterRate32] in {
38 defm V_LOG_LEGACY_F32 : VOP1Inst , "v_log_legacy_f32",
39 VOP_F32_F32
40 >;
41 defm V_EXP_LEGACY_F32 : VOP1Inst , "v_exp_legacy_f32",
42 VOP_F32_F32
43 >;
44 } // End SchedRW = [WriteQuarterRate32]
45
46 } // End SubtargetPredicate = isCIVI
14 // S_CBRANCH_CDBGSYS_AND_USER
143143 }
144144
145145 class VOPDstOperand : RegisterOperand ;
146
147 let Uses = [EXEC] in {
148
149 class VOPAnyCommon pattern> :
150 InstSI {
151
152 let mayLoad = 0;
153 let mayStore = 0;
154 let hasSideEffects = 0;
155 let UseNamedOperandTable = 1;
156 let VALU = 1;
157 }
158
159 class VOP1Common pattern> :
160 VOPAnyCommon {
161
162 let VOP1 = 1;
163 let Size = 4;
164 }
165
166 class VOP2Common pattern> :
167 VOPAnyCommon {
168
169 let VOP2 = 1;
170 let Size = 4;
171 }
172
173 class VOP3Common
174 list pattern = [], bit HasMods = 0,
175 bit VOP3Only = 0> :
176 VOPAnyCommon {
177
178 // Using complex patterns gives VOP3 patterns a very high complexity rating,
179 // but standalone patterns are almost always prefered, so we need to adjust the
180 // priority lower. The goal is to use a high number to reduce complexity to
181 // zero (or less than zero).
182 let AddedComplexity = -1000;
183
184 let VOP3 = 1;
185 let VALU = 1;
186
187 let AsmMatchConverter =
188 !if(!eq(VOP3Only,1),
189 "cvtVOP3",
190 !if(!eq(HasMods,1), "cvtVOP3_2_mod", ""));
191
192 let AsmVariantName = AMDGPUAsmVariants.VOP3;
193
194 let isCodeGenOnly = 0;
195
196 int Size = 8;
197
198 // Because SGPRs may be allowed if there are multiple operands, we
199 // need a post-isel hook to insert copies in order to avoid
200 // violating constant bus requirements.
201 let hasPostISelHook = 1;
202 }
203
204 } // End Uses = [EXEC]
205
206 //===----------------------------------------------------------------------===//
207 // Vector ALU operations
208 //===----------------------------------------------------------------------===//
209
210 class VOP1e op> : Enc32 {
211 bits<8> vdst;
212 bits<9> src0;
213
214 let Inst{8-0} = src0;
215 let Inst{16-9} = op;
216 let Inst{24-17} = vdst;
217 let Inst{31-25} = 0x3f; //encoding
218 }
219
220 class VOP2e op> : Enc32 {
221 bits<8> vdst;
222 bits<9> src0;
223 bits<8> src1;
224
225 let Inst{8-0} = src0;
226 let Inst{16-9} = src1;
227 let Inst{24-17} = vdst;
228 let Inst{30-25} = op;
229 let Inst{31} = 0x0; //encoding
230 }
231
232 class VOP2_MADKe op> : Enc64 {
233
234 bits<8> vdst;
235 bits<9> src0;
236 bits<8> src1;
237 bits<32> imm;
238
239 let Inst{8-0} = src0;
240 let Inst{16-9} = src1;
241 let Inst{24-17} = vdst;
242 let Inst{30-25} = op;
243 let Inst{31} = 0x0; // encoding
244 let Inst{63-32} = imm;
245 }
246
247 class VOP3a op> : Enc64 {
248 bits<2> src0_modifiers;
249 bits<9> src0;
250 bits<2> src1_modifiers;
251 bits<9> src1;
252 bits<2> src2_modifiers;
253 bits<9> src2;
254 bits<1> clamp;
255 bits<2> omod;
256
257 let Inst{8} = src0_modifiers{1};
258 let Inst{9} = src1_modifiers{1};
259 let Inst{10} = src2_modifiers{1};
260 let Inst{11} = clamp;
261 let Inst{25-17} = op;
262 let Inst{31-26} = 0x34; //encoding
263 let Inst{40-32} = src0;
264 let Inst{49-41} = src1;
265 let Inst{58-50} = src2;
266 let Inst{60-59} = omod;
267 let Inst{61} = src0_modifiers{0};
268 let Inst{62} = src1_modifiers{0};
269 let Inst{63} = src2_modifiers{0};
270 }
271
272 class VOP3e op> : VOP3a {
273 bits<8> vdst;
274
275 let Inst{7-0} = vdst;
276 }
277
278 class VOP3be op> : Enc64 {
279 bits<8> vdst;
280 bits<2> src0_modifiers;
281 bits<9> src0;
282 bits<2> src1_modifiers;
283 bits<9> src1;
284 bits<2> src2_modifiers;
285 bits<9> src2;
286 bits<7> sdst;
287 bits<2> omod;
288
289 let Inst{7-0} = vdst;
290 let Inst{14-8} = sdst;
291 let Inst{25-17} = op;
292 let Inst{31-26} = 0x34; //encoding
293 let Inst{40-32} = src0;
294 let Inst{49-41} = src1;
295 let Inst{58-50} = src2;
296 let Inst{60-59} = omod;
297 let Inst{61} = src0_modifiers{0};
298 let Inst{62} = src1_modifiers{0};
299 let Inst{63} = src2_modifiers{0};
300 }
301146
302147 class VINTRPe op> : Enc32 {
303148 bits<8> vdst;
368213
369214 let Uses = [EXEC] in {
370215
371 class VOP1 op, dag outs, dag ins, string asm, list pattern> :
372 VOP1Common ,
373 VOP1e {
374 let isCodeGenOnly = 0;
375 }
376
377 class VOP2 op, dag outs, dag ins, string asm, list pattern> :
378 VOP2Common , VOP2e {
379 let isCodeGenOnly = 0;
380 }
381
382216 class VINTRPCommon pattern> :
383217 InstSI {
384218 let mayLoad = 1;
1212 AssemblerPredicate <"FeatureSeaIslands">;
1313
1414 def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
15
16 class vop {
17 field bits<9> SI3;
18 field bits<10> VI3;
19 }
20
21 class vop1 si, bits<8> vi = si> : vop {
22 field bits<8> SI = si;
23 field bits<8> VI = vi;
24
25 field bits<9> SI3 = {1, 1, si{6-0}};
26 field bits<10> VI3 = !add(0x140, vi);
27 }
28
29 class vop2 si, bits<6> vi = si> : vop {
30 field bits<6> SI = si;
31 field bits<6> VI = vi;
32
33 field bits<9> SI3 = {1, 0, 0, si{5-0}};
34 field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
35 }
36
37 // Specify a VOP2 opcode for SI and VOP3 opcode for VI
38 // that doesn't have VOP2 encoding on VI
39 class vop23 si, bits<10> vi> : vop2 {
40 let VI3 = vi;
41 }
42
43 class vop3 si, bits<10> vi = {0, si}> : vop {
44 let SI3 = si;
45 let VI3 = vi;
46 }
4715
4816 // Execpt for the NONE field, this must be kept in sync with the
4917 // SIEncodingFamily enum in AMDGPUInstrInfo.cpp
638606
639607 // Returns 1 if the source arguments have modifiers, 0 if they do not.
640608 // XXX - do f16 instructions?
641 class hasModifiers {
609 class isFloatType {
642610 bit ret =
611 !if(!eq(SrcVT.Value, f16.Value), 1,
643612 !if(!eq(SrcVT.Value, f32.Value), 1,
644613 !if(!eq(SrcVT.Value, f64.Value), 1,
645 0));
646 }
647
648 class hasIntModifiers {
614 0)));
615 }
616
617 class isIntType {
649618 bit ret =
619 !if(!eq(SrcVT.Value, i16.Value), 1,
650620 !if(!eq(SrcVT.Value, i32.Value), 1,
651621 !if(!eq(SrcVT.Value, i64.Value), 1,
652 0));
622 0)));
653623 }
654624
655625
755725 // VOP1 without input operands (V_NOP)
756726 (ins),
757727 !if(!eq(NumSrcArgs, 1),
758 !if(HasFloatModifiers,
759 // VOP1_SDWA with float modifiers
760 (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
761 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
762 src0_sel:$src0_sel),
763 // VOP1_SDWA with int modifiers
764 (ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
765 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
766 src0_sel:$src0_sel))
767 /* NumSrcArgs == 2 */,
768 !if(HasFloatModifiers,
769 !if(!eq(DstVT.Size, 1),
770 // VOPC_SDWA with float modifiers
771 (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
772 Src1Mod:$src1_fmodifiers, Src1RC:$src1,
773 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
774 // VOP2_SDWA or VOPC_SDWA with float modifiers
775 (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
776 Src1Mod:$src1_fmodifiers, Src1RC:$src1,
777 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
778 src0_sel:$src0_sel, src1_sel:$src1_sel)),
779
780 !if(!eq(DstVT.Size, 1),
781 // VOPC_SDWA with int modifiers
782 (ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
783 Src1Mod:$src1_imodifiers, Src1RC:$src1,
784 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
785 // VOP2_SDWA or VOPC_SDWA with int modifiers
786 (ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
787 Src1Mod:$src1_imodifiers, Src1RC:$src1,
788 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
789 src0_sel:$src0_sel, src1_sel:$src1_sel))
790 /* endif */)));
728 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
729 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
730 src0_sel:$src0_sel),
731 !if(!eq(NumSrcArgs, 2),
732 !if(!eq(DstVT.Size, 1),
733 // VOPC_SDWA with float modifiers
734 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
735 Src1Mod:$src1_modifiers, Src1RC:$src1,
736 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
737 // VOP2_SDWA or VOPC_SDWA with float modifiers
738 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
739 Src1Mod:$src1_modifiers, Src1RC:$src1,
740 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
741 src0_sel:$src0_sel, src1_sel:$src1_sel)),
742 (ins)/* endif */)));
791743 }
792744
793745 // Outs for DPP and SDWA
851803 " vcc", // use vcc token as dst for VOPC instructioins
852804 "$vdst"),
853805 "");
854 string src0 = !if(HasFloatModifiers, "$src0_fmodifiers", "$src0_imodifiers");
855 string src1 = !if(HasFloatModifiers, "$src1_fmodifiers", "$src1_imodifiers");
806 string src0 = "$src0_modifiers";
807 string src1 = "$src1_modifiers";
856808 string args = !if(!eq(NumSrcArgs, 0),
857809 "",
858810 !if(!eq(NumSrcArgs, 1),
889841 )
890842 )
891843 );
844 }
845
846 class BitOr {
847 bit ret = !if(a, 1, !if(b, 1, 0));
848 }
849
850 class BitAnd {
851 bit ret = !if(a, !if(b, 1, 0), 0);
892852 }
893853
894854 class VOPProfile _ArgVT> {
917877
918878 field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
919879 field bit HasDst32 = HasDst;
880 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
920881 field int NumSrcArgs = getNumSrcArgs.ret;
921882 field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1);
922883 field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1);
923884 field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
924 field bit HasSrc0Mods = hasModifiers.ret;
925 field bit HasSrc1Mods = hasModifiers.ret;
926 field bit HasSrc2Mods = hasModifiers.ret;
927
928 field bit HasSrc0IntMods = hasIntModifiers.ret;
929 field bit HasSrc1IntMods = hasIntModifiers.ret;
930 field bit HasSrc2IntMods = hasIntModifiers.ret;
931
932 field bit HasModifiers = HasSrc0Mods;
885
886 // TODO: Modifiers logic is somewhat adhoc here, to be refined later
887 field bit HasModifiers = isFloatType.ret;
888
889 field bit HasSrc0FloatMods = isFloatType.ret;
890 field bit HasSrc1FloatMods = isFloatType.ret;
891 field bit HasSrc2FloatMods = isFloatType.ret;
892
893 field bit HasSrc0IntMods = isIntType.ret;
894 field bit HasSrc1IntMods = isIntType.ret;
895 field bit HasSrc2IntMods = isIntType.ret;
896
897 field bit HasSrc0Mods = HasModifiers;
898 field bit HasSrc1Mods = !if(HasModifiers, BitOr.ret, 0);
899 field bit HasSrc2Mods = !if(HasModifiers, BitOr.ret, 0);
900
933901 field bit HasOMod = HasModifiers;
934902 field bit HasClamp = HasModifiers;
935903 field bit HasSDWAClamp = HasSrc0;
996964 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
997965 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
998966
999 // Restrict src0 to be VGPR
1000 def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
1001 let Src0RC32 = VRegSrc_32;
1002 let Src0RC64 = VRegSrc_32;
1003
1004 let HasExt = 0;
1005 }
1006
1007 // Special case because there are no true output operands. Hack vdst
1008 // to be a src operand. The custom inserter must add a tied implicit
1009 // def and use of the super register since there seems to be no way to
1010 // add an implicit def of a virtual register in tablegen.
1011 def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
1012 let Src0RC32 = VOPDstOperand;
1013 let Src0RC64 = VOPDstOperand;
1014
1015 let Outs = (outs);
1016 let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
1017 let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
1018
1019 let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1020 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
1021 let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_imodifiers, VCSrc_b32:$src0,
1022 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
1023 src0_sel:$src0_sel);
1024
1025 let Asm32 = getAsm32<1, 1>.ret;
1026 let Asm64 = getAsm64<1, 1, 0>.ret;
1027 let AsmDPP = getAsmDPP<1, 1, 0>.ret;
1028 let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
1029
1030 let HasExt = 0;
1031 let HasDst = 0;
1032 }
1033
1034 // Write out to vcc or arbitrary SGPR.
1035 def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
1036 let Asm32 = "$vdst, vcc, $src0, $src1";
1037 let Asm64 = "$vdst, $sdst, $src0, $src1";
1038 let Outs32 = (outs DstRC:$vdst);
1039 let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
1040 }
1041
1042 // Write out to vcc or arbitrary SGPR and read in from vcc or
1043 // arbitrary SGPR.
1044 def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
1045 // We use VCSrc_b32 to exclude literal constants, even though the
1046 // encoding normally allows them since the implicit VCC use means
1047 // using one would always violate the constant bus
1048 // restriction. SGPRs are still allowed because it should
1049 // technically be possible to use VCC again as src0.
1050 let Src0RC32 = VCSrc_b32;
1051 let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
1052 let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
1053 let Outs32 = (outs DstRC:$vdst);
1054 let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
1055
1056 // Suppress src2 implied by type since the 32-bit encoding uses an
1057 // implicit VCC use.
1058 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
1059 }
1060
1061 // Read in from vcc or arbitrary SGPR
1062 def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
1063 let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
1064 let Asm32 = "$vdst, $src0, $src1, vcc";
1065 let Asm64 = "$vdst, $src0, $src1, $src2";
1066 let Outs32 = (outs DstRC:$vdst);
1067 let Outs64 = (outs DstRC:$vdst);
1068
1069 // Suppress src2 implied by type since the 32-bit encoding uses an
1070 // implicit VCC use.
1071 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
1072 }
1073
1074967 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
1075968 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
1076969 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
1077970
1078971 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
1079 def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
1080 field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm);
1081 field string Asm32 = "$vdst, $src0, $src1, $imm";
1082 field bit HasExt = 0;
1083 }
1084 def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
1085 field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1);
1086 field string Asm32 = "$vdst, $src0, $imm, $src1";
1087 field bit HasExt = 0;
1088 }
1089 def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
1090 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
1091 let Ins64 = getIns64, 3,
1092 HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
1093 let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0,
1094 FP32InputMods:$src1_modifiers, Src1RC32:$src1,
1095 VGPR_32:$src2, // stub argument
1096 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1097 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
1098 let InsSDWA = (ins FP32InputMods:$src0_fmodifiers, Src0RC32:$src0,
1099 FP32InputMods:$src1_fmodifiers, Src1RC32:$src1,
1100 VGPR_32:$src2, // stub argument
1101 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
1102 src0_sel:$src0_sel, src1_sel:$src1_sel);
1103 let Asm32 = getAsm32<1, 2, f32>.ret;
1104 let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
1105 let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
1106 let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
1107 }
1108972 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
1109973 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
1110974 def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
1112976 def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
1113977 def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
1114978
1115 class VOP {
1116 string OpName = opName;
1117 }
1118
1119979 class Commutable_REV {
1120980 string RevOp = revOp;
1121981 bit IsOrig = isOrig;
1125985 string NoRetOp = noRetOp;
1126986 bit IsRet = isRet;
1127987 }
1128
1129 class VOP1_Pseudo pattern, string opName> :
1130 VOP1Common ,
1131 VOP ,
1132 SIMCInstr ,
1133 MnemonicAlias {
1134 let isPseudo = 1;
1135 let isCodeGenOnly = 1;
1136
1137 field bits<8> vdst;
1138 field bits<9> src0;
1139 }
1140
1141 class VOP1_Real_si :
1142 VOP1,
1143 SIMCInstr {
1144 let AssemblerPredicate = SIAssemblerPredicate;
1145 let DecoderNamespace = "SICI";
1146 let DisableDecoder = DisableSIDecoder;
1147 }
1148
1149 class VOP1_Real_vi :
1150 VOP1,
1151 SIMCInstr {
1152 let AssemblerPredicates = [isVI];
1153 let DecoderNamespace = "VI";
1154 let DisableDecoder = DisableVIDecoder;
1155 }
1156
1157 multiclass VOP1_m pattern,
1158 string asm = opName#p.Asm32> {
1159 def "" : VOP1_Pseudo ;
1160
1161 def _si : VOP1_Real_si ;
1162
1163 def _vi : VOP1_Real_vi ;
1164
1165 }
1166
1167 class VOP1_DPP :
1168 VOP1_DPPe ,
1169 VOP_DPP {
1170 let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
1171 let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.DPP,
1172 AMDGPUAsmVariants.Disable);
1173 let DecoderNamespace = "DPP";
1174 let DisableDecoder = DisableVIDecoder;
1175 let src0_modifiers = !if(p.HasModifiers, ?, 0);
1176 let src1_modifiers = 0;
1177 }
1178
1179 class SDWADisableFields {
1180 bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?);
1181 bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?);
1182 bits<2> src0_fmodifiers = !if(!eq(p.NumSrcArgs, 0),
1183 0,
1184 !if(p.HasModifiers, ?, 0));
1185 bits<1> src0_imodifiers = !if(!eq(p.NumSrcArgs, 0),
1186 0,
1187 !if(p.HasModifiers, 0, ?));
1188 bits<3> src1_sel = !if(!eq(p.NumSrcArgs, 0), 6,
1189 !if(!eq(p.NumSrcArgs, 1), 6,
1190 ?));
1191 bits<2> src1_fmodifiers = !if(!eq(p.NumSrcArgs, 0), 0,
1192 !if(!eq(p.NumSrcArgs, 1), 0,
1193 !if(p.HasModifiers, ?, 0)));
1194 bits<1> src1_imodifiers = !if(!eq(p.NumSrcArgs, 0), 0,
1195 !if(!eq(p.NumSrcArgs, 1), 0,
1196 !if(p.HasModifiers, 0, ?)));
1197 bits<3> dst_sel = !if(p.HasDst, ?, 6);
1198 bits<2> dst_unused = !if(p.HasDst, ?, 2);
1199 bits<1> clamp = !if(!eq(p.NumSrcArgs, 0), 0, ?);
1200 }
1201
1202 class VOP1_SDWA :
1203 VOP1_SDWAe ,
1204 VOP_SDWA ,
1205 SDWADisableFields

{

1206 let AsmMatchConverter = "cvtSdwaVOP1";
1207 let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
1208 let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA,
1209 AMDGPUAsmVariants.Disable);
1210 let DecoderNamespace = "SDWA";
1211 let DisableDecoder = DisableVIDecoder;
1212 }
1213
1214 multiclass VOP1SI_m pattern,
1215 string asm = opName#p.Asm32> {
1216
1217 def "" : VOP1_Pseudo ;
1218
1219 def _si : VOP1_Real_si ;
1220 }
1221
1222 class VOP2_Pseudo pattern, string opName> :
1223 VOP2Common ,
1224 VOP ,
1225 SIMCInstr,
1226 MnemonicAlias {
1227 let isPseudo = 1;
1228 let isCodeGenOnly = 1;
1229 }
1230
1231 class VOP2_Real_si :
1232 VOP2 ,
1233 SIMCInstr {
1234 let AssemblerPredicates = [isSICI];
1235 let DecoderNamespace = "SICI";
1236 let DisableDecoder = DisableSIDecoder;
1237 }
1238
1239 class VOP2_Real_vi :
1240 VOP2 ,
1241 SIMCInstr {
1242 let AssemblerPredicates = [isVI];
1243 let DecoderNamespace = "VI";
1244 let DisableDecoder = DisableVIDecoder;
1245 }
1246
1247 multiclass VOP2SI_m pattern,
1248 string revOp> {
1249
1250 def "" : VOP2_Pseudo ,
1251 Commutable_REV;
1252
1253 def _si : VOP2_Real_si ;
1254 }
1255
1256 multiclass VOP2_m pattern,
1257 string revOp> {
1258
1259 def "" : VOP2_Pseudo ,
1260 Commutable_REV;
1261
1262 def _si : VOP2_Real_si ;
1263
1264 def _vi : VOP2_Real_vi ;
1265
1266 }
1267
1268 class VOP2_DPP :
1269 VOP2_DPPe ,
1270 VOP_DPP {
1271 let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
1272 let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.DPP,
1273 AMDGPUAsmVariants.Disable);
1274 let DecoderNamespace = "DPP";
1275 let DisableDecoder = DisableVIDecoder;
1276 let src0_modifiers = !if(p.HasModifiers, ?, 0);
1277 let src1_modifiers = !if(p.HasModifiers, ?, 0);
1278 }
1279
1280 class VOP2_SDWA :
1281 VOP2_SDWAe ,
1282 VOP_SDWA ,
1283 SDWADisableFields

{

1284 let AsmMatchConverter = "cvtSdwaVOP2";
1285 let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
1286 let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA,
1287 AMDGPUAsmVariants.Disable);
1288 let DecoderNamespace = "SDWA";
1289 let DisableDecoder = DisableVIDecoder;
1290 }
1291
1292 class VOP3DisableFields {
1293
1294 bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
1295 bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
1296 bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ?, 0), 0);
1297 bits<2> omod = !if(HasModifiers, ?, 0);
1298 bits<1> clamp = !if(HasModifiers, ?, 0);
1299 bits<9> src1 = !if(HasSrc1, ?, 0);
1300 bits<9> src2 = !if(HasSrc2, ?, 0);
1301 }
1302
1303 class VOP3DisableModFields
1304 bit HasSrc1Mods = 0,
1305 bit HasSrc2Mods = 0,
1306 bit HasOutputMods = 0> {
1307 bits<2> src0_modifiers = !if(HasSrc0Mods, ?, 0);
1308 bits<2> src1_modifiers = !if(HasSrc1Mods, ?, 0);
1309 bits<2> src2_modifiers = !if(HasSrc2Mods, ?, 0);
1310 bits<2> omod = !if(HasOutputMods, ?, 0);
1311 bits<1> clamp = !if(HasOutputMods, ?, 0);
1312 }
1313
1314 class VOP3_Pseudo pattern, string opName,
1315 bit HasMods = 0, bit VOP3Only = 0> :
1316 VOP3Common ,
1317 VOP ,
1318 SIMCInstr,
1319 MnemonicAlias {
1320 let isPseudo = 1;
1321 let isCodeGenOnly = 1;
1322
1323 field bit vdst;
1324 field bit src0;
1325 }
1326
1327 class VOP3_Real_si op, dag outs, dag ins, string asm, string opName,
1328 bit HasMods = 0, bit VOP3Only = 0> :
1329 VOP3Common ,
1330 VOP3e ,
1331 SIMCInstr {
1332 let AssemblerPredicates = [isSICI];
1333 let DecoderNamespace = "SICI";
1334 let DisableDecoder = DisableSIDecoder;
1335 }
1336
1337 class VOP3_Real_vi op, dag outs, dag ins, string asm, string opName,
1338 bit HasMods = 0, bit VOP3Only = 0> :
1339 VOP3Common ,
1340 VOP3e_vi ,
1341 SIMCInstr {
1342 let AssemblerPredicates = [isVI];
1343 let DecoderNamespace = "VI";
1344 let DisableDecoder = DisableVIDecoder;
1345 }
1346
1347 class VOP3b_Real_si op, dag outs, dag ins, string asm, string opName,
1348 bit HasMods = 0, bit VOP3Only = 0> :
1349 VOP3Common ,
1350 VOP3be ,
1351 SIMCInstr {
1352 let AssemblerPredicates = [isSICI];
1353 let DecoderNamespace = "SICI";
1354 let DisableDecoder = DisableSIDecoder;
1355 }
1356
1357 class VOP3b_Real_vi op, dag outs, dag ins, string asm, string opName,
1358 bit HasMods = 0, bit VOP3Only = 0> :
1359 VOP3Common ,
1360 VOP3be_vi ,
1361 SIMCInstr {
1362 let AssemblerPredicates = [isVI];
1363 let DecoderNamespace = "VI";
1364 let DisableDecoder = DisableVIDecoder;
1365 }
1366
1367 class VOP3e_Real_si op, dag outs, dag ins, string asm, string opName,
1368 bit HasMods = 0, bit VOP3Only = 0> :
1369 VOP3Common ,
1370 VOP3e ,
1371 SIMCInstr {
1372 let AssemblerPredicates = [isSICI];
1373 let DecoderNamespace = "SICI";
1374 let DisableDecoder = DisableSIDecoder;
1375 }
1376
1377 class VOP3e_Real_vi op, dag outs, dag ins, string asm, string opName,
1378 bit HasMods = 0, bit VOP3Only = 0> :
1379 VOP3Common ,
1380 VOP3e_vi ,
1381 SIMCInstr {
1382 let AssemblerPredicates = [isVI];
1383 let DecoderNamespace = "VI";
1384 let DisableDecoder = DisableVIDecoder;
1385 }
1386
1387 multiclass VOP3_1_m
1388 list pattern, string opName, bit HasMods = 1> {
1389
1390 def "" : VOP3_Pseudo ;
1391
1392 def _si : VOP3_Real_si ,
1393 VOP3DisableFields<0, 0, HasMods>;
1394
1395 def _vi : VOP3_Real_vi ,
1396 VOP3DisableFields<0, 0, HasMods>;
1397 }
1398
1399 multiclass VOP3SI_1_m
1400 list pattern, string opName, bit HasMods = 1> {
1401
1402 def "" : VOP3_Pseudo ;
1403
1404 def _si : VOP3_Real_si ,
1405 VOP3DisableFields<0, 0, HasMods>;
1406 // No VI instruction. This class is for SI only.
1407 }
1408
1409 multiclass VOP3_2_m
1410 list pattern, string opName, string revOp,
1411 bit HasMods = 1> {
1412
1413 def "" : VOP3_Pseudo ,
1414 Commutable_REV;
1415
1416 def _si : VOP3_Real_si ,
1417 VOP3DisableFields<1, 0, HasMods>;
1418
1419 def _vi : VOP3_Real_vi ,
1420 VOP3DisableFields<1, 0, HasMods>;
1421 }
1422
1423 multiclass VOP3SI_2_m
1424 list pattern, string opName, string revOp,
1425 bit HasMods = 1> {
1426
1427 def "" : VOP3_Pseudo ,
1428 Commutable_REV;
1429
1430 def _si : VOP3_Real_si ,
1431 VOP3DisableFields<1, 0, HasMods>;
1432
1433 // No VI instruction. This class is for SI only.
1434 }
1435
1436 // Two operand VOP3b instruction that may have a 3rd SGPR bool operand
1437 // instead of an implicit VCC as in the VOP2b format.
1438 multiclass VOP3b_2_3_m
1439 list pattern, string opName, string revOp,
1440 bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> {
1441 def "" : VOP3_Pseudo ;
1442
1443 def _si : VOP3b_Real_si ,
1444 VOP3DisableFields<1, useSrc2Input, HasMods>;
1445
1446 def _vi : VOP3b_Real_vi ,
1447 VOP3DisableFields<1, useSrc2Input, HasMods>;
1448 }
1449
1450 // Same as VOP3b_2_3_m but no 2nd destination (sdst), e.g. v_cndmask_b32.
1451 multiclass VOP3e_2_3_m
1452 list pattern, string opName, string revOp,
1453 bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> {
1454 def "" : VOP3_Pseudo ;
1455
1456 def _si : VOP3e_Real_si ,
1457 VOP3DisableFields<1, useSrc2Input, HasMods>;
1458
1459 def _vi : VOP3e_Real_vi ,
1460 VOP3DisableFields<1, useSrc2Input, HasMods>;
1461 }
1462
1463
1464 // An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
1465 multiclass VOP2SI_3VI_m
1466 string asm, list pattern = []> {
1467 let isPseudo = 1, isCodeGenOnly = 1 in {
1468 def "" : VOPAnyCommon ,
1469 SIMCInstr;
1470 }
1471
1472 def _si : VOP2 ,
1473 SIMCInstr {
1474 let AssemblerPredicates = [isSICI];
1475 let DecoderNamespace = "SICI";
1476 let DisableDecoder = DisableSIDecoder;
1477 }
1478
1479 def _vi : VOP3Common ,
1480 VOP3e_vi ,
1481 VOP3DisableFields <1, 0, 0>,
1482 SIMCInstr {
1483 let AssemblerPredicates = [isVI];
1484 let DecoderNamespace = "VI";
1485 let DisableDecoder = DisableVIDecoder;
1486 }
1487 }
1488
1489 multiclass VOP1_Helper pat32,
1490 list pat64> {
1491
1492 defm _e32 : VOP1_m ;
1493
1494 defm _e64 : VOP3_1_m
1495 p.HasModifiers>;
1496
1497 def _dpp : VOP1_DPP ;
1498
1499 def _sdwa : VOP1_SDWA ;
1500 }
1501
1502 multiclass VOP1Inst
1503 SDPatternOperator node = null_frag> : VOP1_Helper <
1504 op, opName, P, [],
1505 !if(P.HasModifiers,
1506 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
1507 i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
1508 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))])
1509 >;
1510
1511 multiclass VOP1InstSI
1512 SDPatternOperator node = null_frag> {
1513
1514 defm _e32 : VOP1SI_m ;
1515
1516 defm _e64 : VOP3SI_1_m
1517 !if(P.HasModifiers,
1518 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
1519 i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
1520 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]),
1521 opName, P.HasModifiers>;
1522 }
1523
1524 multiclass VOP2_Helper pat32,
1525 list pat64, string revOp> {
1526
1527 defm _e32 : VOP2_m ;
1528
1529 defm _e64 : VOP3_2_m
1530 revOp, p.HasModifiers>;
1531
1532 def _dpp : VOP2_DPP ;
1533
1534 def _sdwa : VOP2_SDWA ;
1535 }
1536
1537 multiclass VOP2Inst
1538 SDPatternOperator node = null_frag,
1539 string revOp = opName> : VOP2_Helper <
1540 op, opName, P, [],
1541 !if(P.HasModifiers,
1542 [(set P.DstVT:$vdst,
1543 (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
1544 i1:$clamp, i32:$omod)),
1545 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
1546 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
1547 revOp
1548 >;
1549
1550 multiclass VOP2InstSI
1551 SDPatternOperator node = null_frag,
1552 string revOp = opName> {
1553
1554 defm _e32 : VOP2SI_m ;
1555
1556 defm _e64 : VOP3SI_2_m
1557 !if(P.HasModifiers,
1558 [(set P.DstVT:$vdst,
1559 (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
1560 i1:$clamp, i32:$omod)),
1561 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
1562 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
1563 opName, revOp, P.HasModifiers>;
1564 }
1565
1566 multiclass VOP2e_Helper
1567 list pat32, list pat64,
1568 string revOp, bit useSGPRInput> {
1569
1570 let SchedRW = [Write32Bit] in {
1571 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
1572 defm _e32 : VOP2_m ;
1573 }
1574
1575 defm _e64 : VOP3e_2_3_m
1576 opName, revOp, p.HasModifiers, useSGPRInput>;
1577 }
1578 }
1579
1580 multiclass VOP2eInst
1581 SDPatternOperator node = null_frag,
1582 string revOp = opName> : VOP2e_Helper <
1583 op, opName, P, [],
1584 !if(P.HasModifiers,
1585 [(set P.DstVT:$vdst,
1586 (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
1587 i1:$clamp, i32:$omod)),
1588 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
1589 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
1590 revOp, !eq(P.NumSrcArgs, 3)
1591 >;
1592
1593 multiclass VOP2b_Helper
1594 list pat32, list pat64,
1595 string revOp, bit useSGPRInput> {
1596
1597 let SchedRW = [Write32Bit, WriteSALU] in {
1598 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
1599 defm _e32 : VOP2_m ;
1600 }
1601
1602 defm _e64 : VOP3b_2_3_m
1603 opName, revOp, p.HasModifiers, useSGPRInput>;
1604 }
1605 }
1606
1607 multiclass VOP2bInst
1608 SDPatternOperator node = null_frag,
1609 string revOp = opName> : VOP2b_Helper <
1610 op, opName, P, [],
1611 !if(P.HasModifiers,
1612 [(set P.DstVT:$vdst,
1613 (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
1614 i1:$clamp, i32:$omod)),
1615 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
1616 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
1617 revOp, !eq(P.NumSrcArgs, 3)
1618 >;
1619
1620 // A VOP2 instruction that is VOP3-only on VI.
1621 multiclass VOP2_VI3_Helper
1622 list pat32, list pat64, string revOp> {
1623
1624 defm _e32 : VOP2SI_m ;
1625
1626 defm _e64 : VOP3_2_m
1627 revOp, p.HasModifiers>;
1628 }
1629
1630 multiclass VOP2_VI3_Inst
1631 SDPatternOperator node = null_frag,
1632 string revOp = opName>
1633 : VOP2_VI3_Helper <
1634 op, opName, P, [],
1635 !if(P.HasModifiers,
1636 [(set P.DstVT:$vdst,
1637 (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
1638 i1:$clamp, i32:$omod)),
1639 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
1640 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
1641 revOp
1642 >;
1643
1644 multiclass VOP2MADK pattern = []> {
1645
1646 def "" : VOP2_Pseudo ;
1647
1648 let isCodeGenOnly = 0 in {
1649 def _si : VOP2Common
1650 !strconcat(opName, P.Asm32), []>,
1651 SIMCInstr ,
1652 VOP2_MADKe {
1653 let AssemblerPredicates = [isSICI];
1654 let DecoderNamespace = "SICI";
1655 let DisableDecoder = DisableSIDecoder;
1656 }
1657
1658 def _vi : VOP2Common
1659 !strconcat(opName, P.Asm32), []>,
1660 SIMCInstr ,
1661 VOP2_MADKe {
1662 let AssemblerPredicates = [isVI];
1663 let DecoderNamespace = "VI";
1664 let DisableDecoder = DisableVIDecoder;
1665 }
1666 } // End isCodeGenOnly = 0
1667 }
1668
1669 class Vop3ModPat : Pat<
1670 (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
1671 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
1672 (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))),
1673 (Inst i32:$src0_modifiers, P.Src0VT:$src0,
1674 i32:$src1_modifiers, P.Src1VT:$src1,
1675 i32:$src2_modifiers, P.Src2VT:$src2,
1676 i1:$clamp,
1677 i32:$omod)>;
1678988
1679989 //===----------------------------------------------------------------------===//
1680990 // Interpolation opcodes
3535 defm EXP : EXP_m;
3636
3737 //===----------------------------------------------------------------------===//
38 // VOP1 Instructions
39 //===----------------------------------------------------------------------===//
40
41 let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
42 defm V_NOP : VOP1Inst , "v_nop", VOP_NONE>;
43 }
44
45 let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
46 defm V_MOV_B32 : VOP1Inst , "v_mov_b32", VOP_I32_I32>;
47 } // End isMoveImm = 1
48
49 let Uses = [EXEC] in {
50
51 // FIXME: Specify SchedRW for READFIRSTLANE_B32
52
53 def V_READFIRSTLANE_B32 : VOP1 <
54 0x00000002,
55 (outs SReg_32:$vdst),
56 (ins VGPR_32:$src0),
57 "v_readfirstlane_b32 $vdst, $src0",
58 [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]
59 > {
60 let isConvergent = 1;
61 }
62
63 }
64
65 let SchedRW = [WriteQuarterRate32] in {
66
67 defm V_CVT_I32_F64 : VOP1Inst , "v_cvt_i32_f64",
68 VOP_I32_F64, fp_to_sint
69 >;
70 defm V_CVT_F64_I32 : VOP1Inst , "v_cvt_f64_i32",
71 VOP_F64_I32, sint_to_fp
72 >;
73 defm V_CVT_F32_I32 : VOP1Inst , "v_cvt_f32_i32",
74 VOP_F32_I32, sint_to_fp
75 >;
76 defm V_CVT_F32_U32 : VOP1Inst , "v_cvt_f32_u32",
77 VOP_F32_I32, uint_to_fp
78 >;
79 defm V_CVT_U32_F32 : VOP1Inst , "v_cvt_u32_f32",
80 VOP_I32_F32, fp_to_uint
81 >;
82 defm V_CVT_I32_F32 : VOP1Inst , "v_cvt_i32_f32",
83 VOP_I32_F32, fp_to_sint
84 >;
85 defm V_CVT_F16_F32 : VOP1Inst , "v_cvt_f16_f32",
86 VOP_I32_F32, fp_to_f16
87 >;
88 defm V_CVT_F32_F16 : VOP1Inst , "v_cvt_f32_f16",
89 VOP_F32_I32, f16_to_fp
90 >;
91 defm V_CVT_RPI_I32_F32 : VOP1Inst , "v_cvt_rpi_i32_f32",
92 VOP_I32_F32, cvt_rpi_i32_f32>;
93 defm V_CVT_FLR_I32_F32 : VOP1Inst , "v_cvt_flr_i32_f32",
94 VOP_I32_F32, cvt_flr_i32_f32>;
95 defm V_CVT_OFF_F32_I4 : VOP1Inst , "v_cvt_off_f32_i4", VOP_F32_I32>;
96 defm V_CVT_F32_F64 : VOP1Inst , "v_cvt_f32_f64",
97 VOP_F32_F64, fpround
98 >;
99 defm V_CVT_F64_F32 : VOP1Inst , "v_cvt_f64_f32",
100 VOP_F64_F32, fpextend
101 >;
102 defm V_CVT_F32_UBYTE0 : VOP1Inst , "v_cvt_f32_ubyte0",
103 VOP_F32_I32, AMDGPUcvt_f32_ubyte0
104 >;
105 defm V_CVT_F32_UBYTE1 : VOP1Inst , "v_cvt_f32_ubyte1",
106 VOP_F32_I32, AMDGPUcvt_f32_ubyte1
107 >;
108 defm V_CVT_F32_UBYTE2 : VOP1Inst , "v_cvt_f32_ubyte2",
109 VOP_F32_I32, AMDGPUcvt_f32_ubyte2
110 >;
111 defm V_CVT_F32_UBYTE3 : VOP1Inst , "v_cvt_f32_ubyte3",
112 VOP_F32_I32, AMDGPUcvt_f32_ubyte3
113 >;
114 defm V_CVT_U32_F64 : VOP1Inst , "v_cvt_u32_f64",
115 VOP_I32_F64, fp_to_uint
116 >;
117 defm V_CVT_F64_U32 : VOP1Inst , "v_cvt_f64_u32",
118 VOP_F64_I32, uint_to_fp
119 >;
120
121 } // End SchedRW = [WriteQuarterRate32]
122
123 defm V_FRACT_F32 : VOP1Inst , "v_fract_f32",
124 VOP_F32_F32, AMDGPUfract
125 >;
126 defm V_TRUNC_F32 : VOP1Inst , "v_trunc_f32",
127 VOP_F32_F32, ftrunc
128 >;
129 defm V_CEIL_F32 : VOP1Inst , "v_ceil_f32",
130 VOP_F32_F32, fceil
131 >;
132 defm V_RNDNE_F32 : VOP1Inst , "v_rndne_f32",
133 VOP_F32_F32, frint
134 >;
135 defm V_FLOOR_F32 : VOP1Inst , "v_floor_f32",
136 VOP_F32_F32, ffloor
137 >;
138 defm V_EXP_F32 : VOP1Inst , "v_exp_f32",
139 VOP_F32_F32, fexp2
140 >;
141
142 let SchedRW = [WriteQuarterRate32] in {
143
144 defm V_LOG_F32 : VOP1Inst , "v_log_f32",
145 VOP_F32_F32, flog2
146 >;
147 defm V_RCP_F32 : VOP1Inst , "v_rcp_f32",
148 VOP_F32_F32, AMDGPUrcp
149 >;
150 defm V_RCP_IFLAG_F32 : VOP1Inst , "v_rcp_iflag_f32",
151 VOP_F32_F32
152 >;
153 defm V_RSQ_F32 : VOP1Inst , "v_rsq_f32",
154 VOP_F32_F32, AMDGPUrsq
155 >;
156
157 } // End SchedRW = [WriteQuarterRate32]
158
159 let SchedRW = [WriteDouble] in {
160
161 defm V_RCP_F64 : VOP1Inst , "v_rcp_f64",
162 VOP_F64_F64, AMDGPUrcp
163 >;
164 defm V_RSQ_F64 : VOP1Inst , "v_rsq_f64",
165 VOP_F64_F64, AMDGPUrsq
166 >;
167
168 } // End SchedRW = [WriteDouble];
169
170 defm V_SQRT_F32 : VOP1Inst , "v_sqrt_f32",
171 VOP_F32_F32, fsqrt
172 >;
173
174 let SchedRW = [WriteDouble] in {
175
176 defm V_SQRT_F64 : VOP1Inst , "v_sqrt_f64",
177 VOP_F64_F64, fsqrt
178 >;
179
180 } // End SchedRW = [WriteDouble]
181
182 let SchedRW = [WriteQuarterRate32] in {
183
184 defm V_SIN_F32 : VOP1Inst , "v_sin_f32",
185 VOP_F32_F32, AMDGPUsin
186 >;
187 defm V_COS_F32 : VOP1Inst , "v_cos_f32",
188 VOP_F32_F32, AMDGPUcos
189 >;
190
191 } // End SchedRW = [WriteQuarterRate32]
192
193 defm V_NOT_B32 : VOP1Inst , "v_not_b32", VOP_I32_I32>;
194 defm V_BFREV_B32 : VOP1Inst , "v_bfrev_b32", VOP_I32_I32>;
195 defm V_FFBH_U32 : VOP1Inst , "v_ffbh_u32", VOP_I32_I32>;
196 defm V_FFBL_B32 : VOP1Inst , "v_ffbl_b32", VOP_I32_I32>;
197 defm V_FFBH_I32 : VOP1Inst , "v_ffbh_i32", VOP_I32_I32>;
198 defm V_FREXP_EXP_I32_F64 : VOP1Inst , "v_frexp_exp_i32_f64",
199 VOP_I32_F64, int_amdgcn_frexp_exp
200 >;
201
202 let SchedRW = [WriteDoubleAdd] in {
203 defm V_FREXP_MANT_F64 : VOP1Inst , "v_frexp_mant_f64",
204 VOP_F64_F64, int_amdgcn_frexp_mant
205 >;
206
207 defm V_FRACT_F64 : VOP1Inst , "v_fract_f64",
208 VOP_F64_F64, AMDGPUfract
209 >;
210 } // End SchedRW = [WriteDoubleAdd]
211
212
213 defm V_FREXP_EXP_I32_F32 : VOP1Inst , "v_frexp_exp_i32_f32",
214 VOP_I32_F32, int_amdgcn_frexp_exp
215 >;
216 defm V_FREXP_MANT_F32 : VOP1Inst , "v_frexp_mant_f32",
217 VOP_F32_F32, int_amdgcn_frexp_mant
218 >;
219 let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
220 defm V_CLREXCP : VOP1Inst , "v_clrexcp", VOP_NO_EXT>;
221 }
222
223 let Uses = [M0, EXEC] in {
224 // v_movreld_b32 is a special case because the destination output
225 // register is really a source. It isn't actually read (but may be
226 // written), and is only to provide the base register to start
227 // indexing from. Tablegen seems to not let you define an implicit
228 // virtual register output for the super register being written into,
229 // so this must have an implicit def of the register added to it.
230 defm V_MOVRELD_B32 : VOP1Inst , "v_movreld_b32", VOP_MOVRELD>;
231 defm V_MOVRELS_B32 : VOP1Inst , "v_movrels_b32", VOP_I32_VI32_NO_EXT>;
232 defm V_MOVRELSD_B32 : VOP1Inst , "v_movrelsd_b32", VOP_NO_EXT>;
233
234 } // End Uses = [M0, EXEC]
235
236 // These instruction only exist on SI and CI
237 let SubtargetPredicate = isSICI in {
238
239 let SchedRW = [WriteQuarterRate32] in {
240
241 defm V_MOV_FED_B32 : VOP1InstSI , "v_mov_fed_b32", VOP_I32_I32>;
242 defm V_LOG_CLAMP_F32 : VOP1InstSI , "v_log_clamp_f32",
243 VOP_F32_F32, int_amdgcn_log_clamp>;
244 defm V_RCP_CLAMP_F32 : VOP1InstSI , "v_rcp_clamp_f32", VOP_F32_F32>;
245 defm V_RCP_LEGACY_F32 : VOP1InstSI , "v_rcp_legacy_f32",
246 VOP_F32_F32, AMDGPUrcp_legacy>;
247 defm V_RSQ_CLAMP_F32 : VOP1InstSI , "v_rsq_clamp_f32",
248 VOP_F32_F32, AMDGPUrsq_clamp
249 >;
250 defm V_RSQ_LEGACY_F32 : VOP1InstSI , "v_rsq_legacy_f32",
251 VOP_F32_F32, AMDGPUrsq_legacy
252 >;
253
254 } // End SchedRW = [WriteQuarterRate32]
255
256 let SchedRW = [WriteDouble] in {
257
258 defm V_RCP_CLAMP_F64 : VOP1InstSI , "v_rcp_clamp_f64", VOP_F64_F64>;
259 defm V_RSQ_CLAMP_F64 : VOP1InstSI , "v_rsq_clamp_f64",
260 VOP_F64_F64, AMDGPUrsq_clamp
261 >;
262
263 } // End SchedRW = [WriteDouble]
264
265 } // End SubtargetPredicate = isSICI
266
267 //===----------------------------------------------------------------------===//
26838 // VINTRP Instructions
26939 //===----------------------------------------------------------------------===//
27040
31484 (i32 imm:$attr)))]>;
31585
31686 } // End Uses = [M0, EXEC]
317
318 //===----------------------------------------------------------------------===//
319 // VOP2 Instructions
320 //===----------------------------------------------------------------------===//
321
322 defm V_CNDMASK_B32 : VOP2eInst , "v_cndmask_b32",
323 VOP2e_I32_I32_I32_I1
324 >;
325
326 let isCommutable = 1 in {
327 defm V_ADD_F32 : VOP2Inst , "v_add_f32",
328 VOP_F32_F32_F32, fadd
329 >;
330
331 defm V_SUB_F32 : VOP2Inst , "v_sub_f32", VOP_F32_F32_F32, fsub>;
332 defm V_SUBREV_F32 : VOP2Inst , "v_subrev_f32",
333 VOP_F32_F32_F32, null_frag, "v_sub_f32"
334 >;
335 } // End isCommutable = 1
336
337 let isCommutable = 1 in {
338
339 defm V_MUL_LEGACY_F32 : VOP2Inst , "v_mul_legacy_f32",
340 VOP_F32_F32_F32, AMDGPUfmul_legacy
341 >;
342
343 defm V_MUL_F32 : VOP2Inst , "v_mul_f32",
344 VOP_F32_F32_F32, fmul
345 >;
346
347 defm V_MUL_I32_I24 : VOP2Inst , "v_mul_i32_i24",
348 VOP_I32_I32_I32, AMDGPUmul_i24
349 >;
350
351 defm V_MUL_HI_I32_I24 : VOP2Inst , "v_mul_hi_i32_i24",
352 VOP_I32_I32_I32, AMDGPUmulhi_i24
353 >;
354
355 defm V_MUL_U32_U24 : VOP2Inst , "v_mul_u32_u24",
356 VOP_I32_I32_I32, AMDGPUmul_u24
357 >;
358
359 defm V_MUL_HI_U32_U24 : VOP2Inst , "v_mul_hi_u32_u24",
360 VOP_I32_I32_I32, AMDGPUmulhi_u24
361 >;
362
363 defm V_MIN_F32 : VOP2Inst , "v_min_f32", VOP_F32_F32_F32,
364 fminnum>;
365 defm V_MAX_F32 : VOP2Inst , "v_max_f32", VOP_F32_F32_F32,
366 fmaxnum>;
367 defm V_MIN_I32 : VOP2Inst , "v_min_i32", VOP_I32_I32_I32>;
368 defm V_MAX_I32 : VOP2Inst , "v_max_i32", VOP_I32_I32_I32>;
369 defm V_MIN_U32 : VOP2Inst , "v_min_u32", VOP_I32_I32_I32>;
370 defm V_MAX_U32 : VOP2Inst , "v_max_u32", VOP_I32_I32_I32>;
371
372 defm V_LSHRREV_B32 : VOP2Inst <
373 vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
374 "v_lshr_b32"
375 >;
376
377 defm V_ASHRREV_I32 : VOP2Inst <
378 vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
379 "v_ashr_i32"
380 >;
381
382 defm V_LSHLREV_B32 : VOP2Inst <
383 vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
384 "v_lshl_b32"
385 >;
386
387 defm V_AND_B32 : VOP2Inst , "v_and_b32", VOP_I32_I32_I32>;
388 defm V_OR_B32 : VOP2Inst , "v_or_b32", VOP_I32_I32_I32>;
389 defm V_XOR_B32 : VOP2Inst , "v_xor_b32", VOP_I32_I32_I32>;
390
391 let Constraints = "$vdst = $src2", DisableEncoding="$src2",
392 isConvertibleToThreeAddress = 1 in {
393 defm V_MAC_F32 : VOP2Inst , "v_mac_f32", VOP_MAC>;
394 }
395 } // End isCommutable = 1
396
397 defm V_MADMK_F32 : VOP2MADK , "v_madmk_f32", VOP_MADMK>;
398
399 let isCommutable = 1 in {
400 defm V_MADAK_F32 : VOP2MADK , "v_madak_f32", VOP_MADAK>;
401 } // End isCommutable = 1
402
403 let isCommutable = 1 in {
404 // No patterns so that the scalar instructions are always selected.
405 // The scalar versions will be replaced with vector when needed later.
406
407 // V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
408 // but the VI instructions behave the same as the SI versions.
409 defm V_ADD_I32 : VOP2bInst , "v_add_i32",
410 VOP2b_I32_I1_I32_I32
411 >;
412 defm V_SUB_I32 : VOP2bInst , "v_sub_i32", VOP2b_I32_I1_I32_I32>;
413
414 defm V_SUBREV_I32 : VOP2bInst , "v_subrev_i32",
415 VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
416 >;
417
418 defm V_ADDC_U32 : VOP2bInst , "v_addc_u32",
419 VOP2b_I32_I1_I32_I32_I1
420 >;
421 defm V_SUBB_U32 : VOP2bInst , "v_subb_u32",
422 VOP2b_I32_I1_I32_I32_I1
423 >;
424 defm V_SUBBREV_U32 : VOP2bInst , "v_subbrev_u32",
425 VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32"
426 >;
427
428 } // End isCommutable = 1
429
430 // These are special and do not read the exec mask.
431 let isConvergent = 1, Uses = [] in {
432
433 defm V_READLANE_B32 : VOP2SI_3VI_m <
434 vop3 <0x001, 0x289>,
435 "v_readlane_b32",
436 (outs SReg_32:$vdst),
437 (ins VGPR_32:$src0, SCSrc_b32:$src1),
438 "v_readlane_b32 $vdst, $src0, $src1",
439 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]
440 >;
441
442 defm V_WRITELANE_B32 : VOP2SI_3VI_m <
443 vop3 <0x002, 0x28a>,
444 "v_writelane_b32",
445 (outs VGPR_32:$vdst),
446 (ins SReg_32:$src0, SCSrc_b32:$src1),
447 "v_writelane_b32 $vdst, $src0, $src1"
448 >;
449
450 } // End isConvergent = 1
451
452 // These instructions only exist on SI and CI
453 let SubtargetPredicate = isSICI in {
454
455 let isCommutable = 1 in {
456 defm V_MAC_LEGACY_F32 : VOP2InstSI , "v_mac_legacy_f32",
457 VOP_F32_F32_F32
458 >;
459 } // End isCommutable = 1
460
461 defm V_MIN_LEGACY_F32 : VOP2InstSI , "v_min_legacy_f32",
462 VOP_F32_F32_F32, AMDGPUfmin_legacy
463 >;
464 defm V_MAX_LEGACY_F32 : VOP2InstSI , "v_max_legacy_f32",
465 VOP_F32_F32_F32, AMDGPUfmax_legacy
466 >;
467
468 let isCommutable = 1 in {
469 defm V_LSHR_B32 : VOP2InstSI , "v_lshr_b32", VOP_I32_I32_I32>;
470 defm V_ASHR_I32 : VOP2InstSI , "v_ashr_i32", VOP_I32_I32_I32>;
471 defm V_LSHL_B32 : VOP2InstSI , "v_lshl_b32", VOP_I32_I32_I32>;
472 } // End isCommutable = 1
473 } // End let SubtargetPredicate = SICI
474
475 defm V_BFM_B32 : VOP2_VI3_Inst , "v_bfm_b32",
476 VOP_I32_I32_I32
477 >;
478 defm V_BCNT_U32_B32 : VOP2_VI3_Inst , "v_bcnt_u32_b32",
479 VOP_I32_I32_I32
480 >;
481 defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_lo_u32_b32",
482 VOP_I32_I32_I32, int_amdgcn_mbcnt_lo
483 >;
484 defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_hi_u32_b32",
485 VOP_I32_I32_I32, int_amdgcn_mbcnt_hi
486 >;
487 defm V_LDEXP_F32 : VOP2_VI3_Inst , "v_ldexp_f32",
488 VOP_F32_F32_I32, AMDGPUldexp
489 >;
490
491 defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst , "v_cvt_pkaccum_u8_f32",
492 VOP_I32_F32_I32>; // TODO: set "Uses = dst"
493
494 defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst , "v_cvt_pknorm_i16_f32",
495 VOP_I32_F32_F32
496 >;
497 defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst , "v_cvt_pknorm_u16_f32",
498 VOP_I32_F32_F32
499 >;
500 defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst , "v_cvt_pkrtz_f16_f32",
501 VOP_I32_F32_F32, int_SI_packf16
502 >;
503 defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst , "v_cvt_pk_u16_u32",
504 VOP_I32_I32_I32
505 >;
506 defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst , "v_cvt_pk_i16_i32",
507 VOP_I32_I32_I32
508 >;
50987
51088 //===----------------------------------------------------------------------===//
51189 // Pseudo Instructions
1010 //
1111 //===----------------------------------------------------------------------===//
1212
13 class VOP3a_vi op> : Enc64 {
14 bits<2> src0_modifiers;
15 bits<9> src0;
16 bits<2> src1_modifiers;
17 bits<9> src1;
18 bits<2> src2_modifiers;
19 bits<9> src2;
20 bits<1> clamp;
21 bits<2> omod;
22
23 let Inst{8} = src0_modifiers{1};
24 let Inst{9} = src1_modifiers{1};
25 let Inst{10} = src2_modifiers{1};
26 let Inst{15} = clamp;
27 let Inst{25-16} = op;
28 let Inst{31-26} = 0x34; //encoding
29 let Inst{40-32} = src0;
30 let Inst{49-41} = src1;
31 let Inst{58-50} = src2;
32 let Inst{60-59} = omod;
33 let Inst{61} = src0_modifiers{0};
34 let Inst{62} = src1_modifiers{0};
35 let Inst{63} = src2_modifiers{0};
36 }
37
38 class VOP3e_vi op> : VOP3a_vi {
39 bits<8> vdst;
40
41 let Inst{7-0} = vdst;
42 }
43
44 class VOP3be_vi op> : Enc64 {
45 bits<8> vdst;
46 bits<2> src0_modifiers;
47 bits<9> src0;
48 bits<2> src1_modifiers;
49 bits<9> src1;
50 bits<2> src2_modifiers;
51 bits<9> src2;
52 bits<7> sdst;
53 bits<2> omod;
54 bits<1> clamp;
55
56 let Inst{7-0} = vdst;
57 let Inst{14-8} = sdst;
58 let Inst{15} = clamp;
59 let Inst{25-16} = op;
60 let Inst{31-26} = 0x34; //encoding
61 let Inst{40-32} = src0;
62 let Inst{49-41} = src1;
63 let Inst{58-50} = src2;
64 let Inst{60-59} = omod;
65 let Inst{61} = src0_modifiers{0};
66 let Inst{62} = src1_modifiers{0};
67 let Inst{63} = src2_modifiers{0};
68 }
69
70 class VOP_DPP pattern, bit HasMods = 0> :
71 VOPAnyCommon {
72 let DPP = 1;
73 let Size = 8;
74
75 let AsmMatchConverter = !if(!eq(HasMods,1), "cvtDPP", "");
76 let AsmVariantName = AMDGPUAsmVariants.DPP;
77 }
78
79 class VOP_DPPe : Enc64 {
80 bits<2> src0_modifiers;
81 bits<8> src0;
82 bits<2> src1_modifiers;
83 bits<9> dpp_ctrl;
84 bits<1> bound_ctrl;
85 bits<4> bank_mask;
86 bits<4> row_mask;
87
88 let Inst{39-32} = src0;
89 let Inst{48-40} = dpp_ctrl;
90 let Inst{51} = bound_ctrl;
91 let Inst{52} = src0_modifiers{0}; // src0_neg
92 let Inst{53} = src0_modifiers{1}; // src0_abs
93 let Inst{54} = src1_modifiers{0}; // src1_neg
94 let Inst{55} = src1_modifiers{1}; // src1_abs
95 let Inst{59-56} = bank_mask;
96 let Inst{63-60} = row_mask;
97 }
98
99 class VOP1_DPPe op> : VOP_DPPe {
100 bits<8> vdst;
101
102 let Inst{8-0} = 0xfa; // dpp
103 let Inst{16-9} = op;
104 let Inst{24-17} = vdst;
105 let Inst{31-25} = 0x3f; //encoding
106 }
107
108 class VOP2_DPPe op> : VOP_DPPe {
109 bits<8> vdst;
110 bits<8> src1;
111
112 let Inst{8-0} = 0xfa; //dpp
113 let Inst{16-9} = src1;
114 let Inst{24-17} = vdst;
115 let Inst{30-25} = op;
116 let Inst{31} = 0x0; //encoding
117 }
118
119 class VOP_SDWA pattern, bit HasMods = 0> :
120 VOPAnyCommon {
121 let SDWA = 1;
122 let Size = 8;
123 let AsmVariantName = AMDGPUAsmVariants.SDWA;
124 }
125
126 class VOP_SDWAe : Enc64 {
127 bits<8> src0;
128 bits<3> src0_sel;
129 bits<2> src0_fmodifiers; // {abs,neg}
130 bits<1> src0_imodifiers; // sext
131 bits<3> src1_sel;
132 bits<2> src1_fmodifiers;
133 bits<1> src1_imodifiers;
134 bits<3> dst_sel;
135 bits<2> dst_unused;
136 bits<1> clamp;
137
138 let Inst{39-32} = src0;
139 let Inst{42-40} = dst_sel;
140 let Inst{44-43} = dst_unused;
141 let Inst{45} = clamp;
142 let Inst{50-48} = src0_sel;
143 let Inst{53-52} = src0_fmodifiers;
144 let Inst{51} = src0_imodifiers;
145 let Inst{58-56} = src1_sel;
146 let Inst{61-60} = src1_fmodifiers;
147 let Inst{59} = src1_imodifiers;
148 }
149
150 class VOP1_SDWAe op> : VOP_SDWAe {
151 bits<8> vdst;
152
153 let Inst{8-0} = 0xf9; // sdwa
154 let Inst{16-9} = op;
155 let Inst{24-17} = vdst;
156 let Inst{31-25} = 0x3f; // encoding
157 }
158
159 class VOP2_SDWAe op> : VOP_SDWAe {
160 bits<8> vdst;
161 bits<8> src1;
162
163 let Inst{8-0} = 0xf9; // sdwa
164 let Inst{16-9} = src1;
165 let Inst{24-17} = vdst;
166 let Inst{30-25} = op;
167 let Inst{31} = 0x0; // encoding
168 }
169
17013 class EXPe_vi : EXPe {
17114 let Inst{31-26} = 0x31; //encoding
17215 }
77 //===----------------------------------------------------------------------===//
88 // Instruction definitions for VI and newer.
99 //===----------------------------------------------------------------------===//
10
11 let SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI in {
12
13 let DisableSIDecoder = 1 in {
14
15 //===----------------------------------------------------------------------===//
16 // VOP1 Instructions
17 //===----------------------------------------------------------------------===//
18
19 defm V_CVT_F16_U16 : VOP1Inst , "v_cvt_f16_u16", VOP_F16_I16>;
20 defm V_CVT_F16_I16 : VOP1Inst , "v_cvt_f16_i16", VOP_F16_I16>;
21 defm V_CVT_U16_F16 : VOP1Inst , "v_cvt_u16_f16", VOP_I16_F16>;
22 defm V_CVT_I16_F16 : VOP1Inst , "v_cvt_i16_f16", VOP_I16_F16>;
23 defm V_RCP_F16 : VOP1Inst , "v_rcp_f16", VOP_F16_F16>;
24 defm V_SQRT_F16 : VOP1Inst , "v_sqrt_f16", VOP_F16_F16>;
25 defm V_RSQ_F16 : VOP1Inst , "v_rsq_f16", VOP_F16_F16>;
26 defm V_LOG_F16 : VOP1Inst , "v_log_f16", VOP_F16_F16>;
27 defm V_EXP_F16 : VOP1Inst , "v_exp_f16", VOP_F16_F16>;
28 defm V_FREXP_MANT_F16 : VOP1Inst , "v_frexp_mant_f16",
29 VOP_F16_F16
30 >;
31 defm V_FREXP_EXP_I16_F16 : VOP1Inst , "v_frexp_exp_i16_f16",
32 VOP_I16_F16
33 >;
34 defm V_FLOOR_F16 : VOP1Inst , "v_floor_f16", VOP_F16_F16>;
35 defm V_CEIL_F16 : VOP1Inst , "v_ceil_f16", VOP_F16_F16>;
36 defm V_TRUNC_F16 : VOP1Inst , "v_trunc_f16", VOP_F16_F16>;
37 defm V_RNDNE_F16 : VOP1Inst , "v_rndne_f16", VOP_F16_F16>;
38 defm V_FRACT_F16 : VOP1Inst , "v_fract_f16", VOP_F16_F16>;
39 defm V_SIN_F16 : VOP1Inst , "v_sin_f16", VOP_F16_F16>;
40 defm V_COS_F16 : VOP1Inst , "v_cos_f16", VOP_F16_F16>;
41
42 //===----------------------------------------------------------------------===//
43 // VOP2 Instructions
44 //===----------------------------------------------------------------------===//
45
46 let isCommutable = 1 in {
47
48 defm V_ADD_F16 : VOP2Inst , "v_add_f16", VOP_F16_F16_F16>;
49 defm V_SUB_F16 : VOP2Inst , "v_sub_f16", VOP_F16_F16_F16>;
50 defm V_SUBREV_F16 : VOP2Inst , "v_subrev_f16", VOP_F16_F16_F16,
51 null_frag, "v_sub_f16"
52 >;
53 defm V_MUL_F16 : VOP2Inst , "v_mul_f16", VOP_F16_F16_F16>;
54 defm V_MAC_F16 : VOP2Inst , "v_mac_f16", VOP_F16_F16_F16>;
55 } // End isCommutable = 1
56 defm V_MADMK_F16 : VOP2MADK , "v_madmk_f16", VOP_MADMK>;
57 let isCommutable = 1 in {
58 defm V_MADAK_F16 : VOP2MADK , "v_madak_f16", VOP_MADAK>;
59 defm V_ADD_U16 : VOP2Inst , "v_add_u16", VOP_I16_I16_I16>;
60 defm V_SUB_U16 : VOP2Inst , "v_sub_u16" , VOP_I16_I16_I16>;
61 defm V_SUBREV_U16 : VOP2Inst , "v_subrev_u16", VOP_I16_I16_I16>;
62 defm V_MUL_LO_U16 : VOP2Inst , "v_mul_lo_u16", VOP_I16_I16_I16>;
63 } // End isCommutable = 1
64 defm V_LSHLREV_B16 : VOP2Inst , "v_lshlrev_b16", VOP_I16_I16_I16>;
65 defm V_LSHRREV_B16 : VOP2Inst , "v_lshrrev_b16", VOP_I16_I16_I16>;
66 defm V_ASHRREV_B16 : VOP2Inst , "v_ashrrev_b16", VOP_I16_I16_I16>;
67 let isCommutable = 1 in {
68 defm V_MAX_F16 : VOP2Inst , "v_max_f16", VOP_F16_F16_F16>;
69 defm V_MIN_F16 : VOP2Inst , "v_min_f16", VOP_F16_F16_F16>;
70 defm V_MAX_U16 : VOP2Inst , "v_max_u16", VOP_I16_I16_I16>;
71 defm V_MAX_I16 : VOP2Inst , "v_max_i16", VOP_I16_I16_I16>;
72 defm V_MIN_U16 : VOP2Inst , "v_min_u16", VOP_I16_I16_I16>;
73 defm V_MIN_I16 : VOP2Inst , "v_min_i16", VOP_I16_I16_I16>;
74 } // End isCommutable = 1
75 defm V_LDEXP_F16 : VOP2Inst , "v_ldexp_f16", VOP_F16_F16_I16>;
76
77 } // let DisableSIDecoder = 1
78
79 // Aliases to simplify matching of floating-point instructions that
80 // are VOP2 on SI and VOP3 on VI.
81
82 class SI2_VI3Alias : InstAlias <
83 name#" $dst, $src0, $src1",
84 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0)
85 >, PredicateControl {
86 let UseInstAsmMatchConverter = 0;
87 let AsmVariantName = AMDGPUAsmVariants.VOP3;
88 }
89
90 def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
91 def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
92 def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
93 def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
94 def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
95
96 } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
97
98 let Predicates = [isVI] in {
99
100 //===----------------------------------------------------------------------===//
101 // DPP Patterns
102 //===----------------------------------------------------------------------===//
103
104 def : Pat <
105 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
106 imm:$bound_ctrl),
107 (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
108 (as_i32imm $bank_mask), (as_i1imm $bound_ctrl))
109 >;
110
111 //===----------------------------------------------------------------------===//
112 // Misc Patterns
113 //===----------------------------------------------------------------------===//
114
115 } // End Predicates = [isVI]
0 //===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 //===----------------------------------------------------------------------===//
10 // VOP1 Classes
11 //===----------------------------------------------------------------------===//
12
13 class VOP1e op, VOPProfile P> : Enc32 {
14 bits<8> vdst;
15 bits<9> src0;
16
17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, 0);
18 let Inst{16-9} = op;
19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
20 let Inst{31-25} = 0x3f; //encoding
21 }
22
23 class VOP1_Pseudo pattern=[]> :
24 InstSI ,
25 VOP ,
26 SIMCInstr ,
27 MnemonicAlias {
28
29 let isPseudo = 1;
30 let isCodeGenOnly = 1;
31 let UseNamedOperandTable = 1;
32
33 string Mnemonic = opName;
34 string AsmOperands = P.Asm32;
35
36 let Size = 4;
37 let mayLoad = 0;
38 let mayStore = 0;
39 let hasSideEffects = 0;
40 let SubtargetPredicate = isGCN;
41
42 let VOP1 = 1;
43 let VALU = 1;
44 let Uses = [EXEC];
45
46 let AsmVariantName = AMDGPUAsmVariants.Default;
47
48 VOPProfile Pfl = P;
49 }
50
51 class VOP1_Real :
52 InstSI ,
53 SIMCInstr {
54
55 let isPseudo = 0;
56 let isCodeGenOnly = 0;
57
58 // copy relevant pseudo op flags
59 let SubtargetPredicate = ps.SubtargetPredicate;
60 let AsmMatchConverter = ps.AsmMatchConverter;
61 let AsmVariantName = ps.AsmVariantName;
62 let Constraints = ps.Constraints;
63 let DisableEncoding = ps.DisableEncoding;
64 let TSFlags = ps.TSFlags;
65 }
66
67 class getVOP1Pat64 : LetDummies {
68 list ret = !if(P.HasModifiers,
69 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
70 i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
71 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]);
72 }
73
74 multiclass VOP1Inst
75 SDPatternOperator node = null_frag> {
76 def _e32 : VOP1_Pseudo ;
77 def _e64 : VOP3_Pseudo .ret>;
78 }
79
80 //===----------------------------------------------------------------------===//
81 // VOP1 Instructions
82 //===----------------------------------------------------------------------===//
83
84 let VOPAsmPrefer32Bit = 1 in {
85 defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>;
86 }
87
88 let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
89 defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
90 } // End isMoveImm = 1
91
92 // FIXME: Specify SchedRW for READFIRSTLANE_B32
93 // TODO: Make profile for this, there is VOP3 encoding also
94 def V_READFIRSTLANE_B32 :
95 InstSI <(outs SReg_32:$vdst),
96 (ins VGPR_32:$src0),
97 "v_readfirstlane_b32 $vdst, $src0",
98 [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>,
99 Enc32 {
100
101 let isCodeGenOnly = 0;
102 let UseNamedOperandTable = 1;
103
104 let Size = 4;
105 let mayLoad = 0;
106 let mayStore = 0;
107 let hasSideEffects = 0;
108 let SubtargetPredicate = isGCN;
109
110 let VOP1 = 1;
111 let VALU = 1;
112 let Uses = [EXEC];
113 let isConvergent = 1;
114
115 bits<8> vdst;
116 bits<9> src0;
117
118 let Inst{8-0} = src0;
119 let Inst{16-9} = 0x2;
120 let Inst{24-17} = vdst;
121 let Inst{31-25} = 0x3f; //encoding
122 }
123
124 let SchedRW = [WriteQuarterRate32] in {
125 defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
126 defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>;
127 defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
128 defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
129 defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
130 defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
131 defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>;
132 defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>;
133 defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
134 defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
135 defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;
136 defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
137 defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
138 defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>;
139 defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>;
140 defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>;
141 defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>;
142 defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
143 defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>;
144 } // End SchedRW = [WriteQuarterRate32]
145
146 defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
147 defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
148 defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>;
149 defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
150 defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
151 defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
152
153 let SchedRW = [WriteQuarterRate32] in {
154 defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
155 defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
156 defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32>;
157 defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
158 } // End SchedRW = [WriteQuarterRate32]
159
160 let SchedRW = [WriteDouble] in {
161 defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>;
162 defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>;
163 } // End SchedRW = [WriteDouble];
164
165 defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>;
166
167 let SchedRW = [WriteDouble] in {
168 defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>;
169 } // End SchedRW = [WriteDouble]
170
171 let SchedRW = [WriteQuarterRate32] in {
172 defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>;
173 defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
174 } // End SchedRW = [WriteQuarterRate32]
175
176 defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
177 defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>;
178 defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>;
179 defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>;
180 defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>;
181 defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>;
182
183 let SchedRW = [WriteDoubleAdd] in {
184 defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>;
185 defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>;
186 } // End SchedRW = [WriteDoubleAdd]
187
188 defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>;
189 defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>;
190
191 let VOPAsmPrefer32Bit = 1 in {
192 defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT>;
193 }
194
195 // Restrict src0 to be VGPR
196 def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
197 let Src0RC32 = VRegSrc_32;
198 let Src0RC64 = VRegSrc_32;
199
200 let HasExt = 0;
201 }
202
203 // Special case because there are no true output operands. Hack vdst
204 // to be a src operand. The custom inserter must add a tied implicit
205 // def and use of the super register since there seems to be no way to
206 // add an implicit def of a virtual register in tablegen.
207 def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
208 let Src0RC32 = VOPDstOperand;
209 let Src0RC64 = VOPDstOperand;
210
211 let Outs = (outs);
212 let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
213 let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
214
215 let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
216 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
217 let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_modifiers, VCSrc_b32:$src0,
218 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
219 src0_sel:$src0_sel);
220
221 let Asm32 = getAsm32<1, 1>.ret;
222 let Asm64 = getAsm64<1, 1, 0>.ret;
223 let AsmDPP = getAsmDPP<1, 1, 0>.ret;
224 let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
225
226 let HasExt = 0;
227 let HasDst = 0;
228 let EmitDst = 1; // force vdst emission
229 }
230
231 let Uses = [M0, EXEC] in {
232 // v_movreld_b32 is a special case because the destination output
233 // register is really a source. It isn't actually read (but may be
234 // written), and is only to provide the base register to start
235 // indexing from. Tablegen seems to not let you define an implicit
236 // virtual register output for the super register being written into,
237 // so this must have an implicit def of the register added to it.
238 defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>;
239 defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
240 defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT>;
241 } // End Uses = [M0, EXEC]
242
243 // These instruction only exist on SI and CI
244 let SubtargetPredicate = isSICI in {
245
246 let SchedRW = [WriteQuarterRate32] in {
247 defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
248 defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
249 defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
250 defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
251 defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
252 defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
253 } // End SchedRW = [WriteQuarterRate32]
254
255 let SchedRW = [WriteDouble] in {
256 defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>;
257 defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>;
258 } // End SchedRW = [WriteDouble]
259
260 } // End SubtargetPredicate = isSICI
261
262
263 let SubtargetPredicate = isCIVI in {
264
265 let SchedRW = [WriteDoubleAdd] in {
266 defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>;
267 defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>;
268 defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>;
269 defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>;
270 } // End SchedRW = [WriteDoubleAdd]
271
272 let SchedRW = [WriteQuarterRate32] in {
273 defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>;
274 defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
275 } // End SchedRW = [WriteQuarterRate32]
276
277 } // End SubtargetPredicate = isCIVI
278
279
280 let SubtargetPredicate = isVI in {
281
282 defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16>;
283 defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16>;
284 defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16>;
285 defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16>;
286 defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16>;
287 defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16>;
288 defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16>;
289 defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16>;
290 defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16>;
291 defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16>;
292 defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16>;
293 defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16>;
294 defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16>;
295 defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16>;
296 defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16>;
297 defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16>;
298 defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16>;
299 defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16>;
300
301 }
302
303 //===----------------------------------------------------------------------===//
304 // Target
305 //===----------------------------------------------------------------------===//
306
307 //===----------------------------------------------------------------------===//
308 // SI
309 //===----------------------------------------------------------------------===//
310
311 multiclass VOP1_Real_si op> {
312 let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
313 def _e32_si :
314 VOP1_Real(NAME#"_e32"), SIEncodingFamily.SI>,
315 VOP1e(NAME#"_e32").Pfl>;
316 def _e64_si :
317 VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>,
318 VOP3e_si <{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>;
319 }
320 }
321
322 defm V_NOP : VOP1_Real_si <0x0>;
323 defm V_MOV_B32 : VOP1_Real_si <0x1>;
324 defm V_CVT_I32_F64 : VOP1_Real_si <0x3>;
325 defm V_CVT_F64_I32 : VOP1_Real_si <0x4>;
326 defm V_CVT_F32_I32 : VOP1_Real_si <0x5>;
327 defm V_CVT_F32_U32 : VOP1_Real_si <0x6>;
328 defm V_CVT_U32_F32 : VOP1_Real_si <0x7>;
329 defm V_CVT_I32_F32 : VOP1_Real_si <0x8>;
330 defm V_MOV_FED_B32 : VOP1_Real_si <0x9>;
331 defm V_CVT_F16_F32 : VOP1_Real_si <0xa>;
332 defm V_CVT_F32_F16 : VOP1_Real_si <0xb>;
333 defm V_CVT_RPI_I32_F32 : VOP1_Real_si <0xc>;
334 defm V_CVT_FLR_I32_F32 : VOP1_Real_si <0xd>;
335 defm V_CVT_OFF_F32_I4 : VOP1_Real_si <0xe>;
336 defm V_CVT_F32_F64 : VOP1_Real_si <0xf>;
337 defm V_CVT_F64_F32 : VOP1_Real_si <0x10>;
338 defm V_CVT_F32_UBYTE0 : VOP1_Real_si <0x11>;
339 defm V_CVT_F32_UBYTE1 : VOP1_Real_si <0x12>;
340 defm V_CVT_F32_UBYTE2 : VOP1_Real_si <0x13>;
341 defm V_CVT_F32_UBYTE3 : VOP1_Real_si <0x14>;
342 defm V_CVT_U32_F64 : VOP1_Real_si <0x15>;
343 defm V_CVT_F64_U32 : VOP1_Real_si <0x16>;
344 defm V_FRACT_F32 : VOP1_Real_si <0x20>;
345 defm V_TRUNC_F32 : VOP1_Real_si <0x21>;
346 defm V_CEIL_F32 : VOP1_Real_si <0x22>;
347 defm V_RNDNE_F32 : VOP1_Real_si <0x23>;
348 defm V_FLOOR_F32 : VOP1_Real_si <0x24>;
349 defm V_EXP_F32 : VOP1_Real_si <0x25>;
350 defm V_LOG_CLAMP_F32 : VOP1_Real_si <0x26>;
351 defm V_LOG_F32 : VOP1_Real_si <0x27>;
352 defm V_RCP_CLAMP_F32 : VOP1_Real_si <0x28>;
353 defm V_RCP_LEGACY_F32 : VOP1_Real_si <0x29>;
354 defm V_RCP_F32 : VOP1_Real_si <0x2a>;
355 defm V_RCP_IFLAG_F32 : VOP1_Real_si <0x2b>;
356 defm V_RSQ_CLAMP_F32 : VOP1_Real_si <0x2c>;
357 defm V_RSQ_LEGACY_F32 : VOP1_Real_si <0x2d>;
358 defm V_RSQ_F32 : VOP1_Real_si <0x2e>;
359 defm V_RCP_F64 : VOP1_Real_si <0x2f>;
360 defm V_RCP_CLAMP_F64 : VOP1_Real_si <0x30>;
361 defm V_RSQ_F64 : VOP1_Real_si <0x31>;
362 defm V_RSQ_CLAMP_F64 : VOP1_Real_si <0x32>;
363 defm V_SQRT_F32 : VOP1_Real_si <0x33>;
364 defm V_SQRT_F64 : VOP1_Real_si <0x34>;
365 defm V_SIN_F32 : VOP1_Real_si <0x35>;
366 defm V_COS_F32 : VOP1_Real_si <0x36>;
367 defm V_NOT_B32 : VOP1_Real_si <0x37>;
368 defm V_BFREV_B32 : VOP1_Real_si <0x38>;
369 defm V_FFBH_U32 : VOP1_Real_si <0x39>;
370 defm V_FFBL_B32 : VOP1_Real_si <0x3a>;
371 defm V_FFBH_I32 : VOP1_Real_si <0x3b>;
372 defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>;
373 defm V_FREXP_MANT_F64 : VOP1_Real_si <0x3d>;
374 defm V_FRACT_F64 : VOP1_Real_si <0x3e>;
375 defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>;
376 defm V_FREXP_MANT_F32 : VOP1_Real_si <0x40>;
377 defm V_CLREXCP : VOP1_Real_si <0x41>;
378 defm V_MOVRELD_B32 : VOP1_Real_si <0x42>;
379 defm V_MOVRELS_B32 : VOP1_Real_si <0x43>;
380 defm V_MOVRELSD_B32 : VOP1_Real_si <0x44>;
381
382 //===----------------------------------------------------------------------===//
383 // CI
384 //===----------------------------------------------------------------------===//
385
386 multiclass VOP1_Real_ci op> {
387 let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in {
388 def _e32_ci :
389 VOP1_Real(NAME#"_e32"), SIEncodingFamily.SI>,
390 VOP1e(NAME#"_e32").Pfl>;
391 def _e64_ci :
392 VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>,
393 VOP3e_si <{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>;
394 }
395 }
396
397 defm V_TRUNC_F64 : VOP1_Real_ci <0x17>;
398 defm V_CEIL_F64 : VOP1_Real_ci <0x18>;
399 defm V_FLOOR_F64 : VOP1_Real_ci <0x1A>;
400 defm V_RNDNE_F64 : VOP1_Real_ci <0x19>;
401 defm V_LOG_LEGACY_F32 : VOP1_Real_ci <0x45>;
402 defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>;
403
404 //===----------------------------------------------------------------------===//
405 // VI
406 //===----------------------------------------------------------------------===//
407
408 class VOP1_SDWA op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
409 VOP_SDWA {
410 let Defs = ps.Defs;
411 let Uses = ps.Uses;
412 let SchedRW = ps.SchedRW;
413 let hasSideEffects = ps.hasSideEffects;
414 let AsmMatchConverter = "cvtSdwaVOP1";
415
416 bits<8> vdst;
417 let Inst{8-0} = 0xf9; // sdwa
418 let Inst{16-9} = op;
419 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
420 let Inst{31-25} = 0x3f; // encoding
421 }
422
423 class VOP1_DPP op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
424 VOP_DPP {
425 let Defs = ps.Defs;
426 let Uses = ps.Uses;
427 let SchedRW = ps.SchedRW;
428 let hasSideEffects = ps.hasSideEffects;
429
430 bits<8> vdst;
431 let Inst{8-0} = 0xfa; // dpp
432 let Inst{16-9} = op;
433 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
434 let Inst{31-25} = 0x3f; //encoding
435 }
436
437 multiclass VOP1_Real_vi op> {
438 let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
439 def _e32_vi :
440 VOP1_Real(NAME#"_e32"), SIEncodingFamily.VI>,
441 VOP1e(NAME#"_e32").Pfl>;
442 def _e64_vi :
443 VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>,
444 VOP3e_vi (NAME#"_e64").Pfl>;
445 }
446
447 // for now left sdwa/dpp only for asm/dasm
448 // TODO: add corresponding pseudo
449 def _sdwa : VOP1_SDWA(NAME#"_e32")>;
450 def _dpp : VOP1_DPP(NAME#"_e32")>;
451 }
452
453 defm V_NOP : VOP1_Real_vi <0x0>;
454 defm V_MOV_B32 : VOP1_Real_vi <0x1>;
455 defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>;
456 defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>;
457 defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>;
458 defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>;
459 defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>;
460 defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>;
461 defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>;
462 defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>;
463 defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>;
464 defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>;
465 defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>;
466 defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>;
467 defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>;
468 defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>;
469 defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>;
470 defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>;
471 defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>;
472 defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>;
473 defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>;
474 defm V_FRACT_F32 : VOP1_Real_vi <0x1b>;
475 defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>;
476 defm V_CEIL_F32 : VOP1_Real_vi <0x1d>;
477 defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>;
478 defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>;
479 defm V_EXP_F32 : VOP1_Real_vi <0x20>;
480 defm V_LOG_F32 : VOP1_Real_vi <0x21>;
481 defm V_RCP_F32 : VOP1_Real_vi <0x22>;
482 defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>;
483 defm V_RSQ_F32 : VOP1_Real_vi <0x24>;
484 defm V_RCP_F64 : VOP1_Real_vi <0x25>;
485 defm V_RSQ_F64 : VOP1_Real_vi <0x26>;
486 defm V_SQRT_F32 : VOP1_Real_vi <0x27>;
487 defm V_SQRT_F64 : VOP1_Real_vi <0x28>;
488 defm V_SIN_F32 : VOP1_Real_vi <0x29>;
489 defm V_COS_F32 : VOP1_Real_vi <0x2a>;
490 defm V_NOT_B32 : VOP1_Real_vi <0x2b>;
491 defm V_BFREV_B32 : VOP1_Real_vi <0x2c>;
492 defm V_FFBH_U32 : VOP1_Real_vi <0x2d>;
493 defm V_FFBL_B32 : VOP1_Real_vi <0x2e>;
494 defm V_FFBH_I32 : VOP1_Real_vi <0x2f>;
495 defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>;
496 defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>;
497 defm V_FRACT_F64 : VOP1_Real_vi <0x32>;
498 defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>;
499 defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>;
500 defm V_CLREXCP : VOP1_Real_vi <0x35>;
501 defm V_MOVRELD_B32 : VOP1_Real_vi <0x36>;
502 defm V_MOVRELS_B32 : VOP1_Real_vi <0x37>;
503 defm V_MOVRELSD_B32 : VOP1_Real_vi <0x38>;
504 defm V_TRUNC_F64 : VOP1_Real_vi <0x17>;
505 defm V_CEIL_F64 : VOP1_Real_vi <0x18>;
506 defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>;
507 defm V_RNDNE_F64 : VOP1_Real_vi <0x19>;
508 defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>;
509 defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>;
510 defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>;
511 defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>;
512 defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>;
513 defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>;
514 defm V_RCP_F16 : VOP1_Real_vi <0x3d>;
515 defm V_SQRT_F16 : VOP1_Real_vi <0x3e>;
516 defm V_RSQ_F16 : VOP1_Real_vi <0x3f>;
517 defm V_LOG_F16 : VOP1_Real_vi <0x40>;
518 defm V_EXP_F16 : VOP1_Real_vi <0x41>;
519 defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>;
520 defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>;
521 defm V_FLOOR_F16 : VOP1_Real_vi <0x44>;
522 defm V_CEIL_F16 : VOP1_Real_vi <0x45>;
523 defm V_TRUNC_F16 : VOP1_Real_vi <0x46>;
524 defm V_RNDNE_F16 : VOP1_Real_vi <0x47>;
525 defm V_FRACT_F16 : VOP1_Real_vi <0x48>;
526 defm V_SIN_F16 : VOP1_Real_vi <0x49>;
527 defm V_COS_F16 : VOP1_Real_vi <0x4a>;
528
529 let Predicates = [isVI] in {
530
531 def : Pat <
532 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
533 imm:$bound_ctrl),
534 (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
535 (as_i32imm $bank_mask), (as_i1imm $bound_ctrl))
536 >;
537
538 } // End Predicates = [isVI]
0 //===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 //===----------------------------------------------------------------------===//
10 // VOP2 Classes
11 //===----------------------------------------------------------------------===//
12
13 class VOP2e op, VOPProfile P> : Enc32 {
14 bits<8> vdst;
15 bits<9> src0;
16 bits<8> src1;
17
18 let Inst{8-0} = !if(P.HasSrc0, src0, 0);
19 let Inst{16-9} = !if(P.HasSrc1, src1, 0);
20 let Inst{24-17} = !if(P.EmitDst, vdst, 0);
21 let Inst{30-25} = op;
22 let Inst{31} = 0x0; //encoding
23 }
24
25 class VOP2_MADKe op, VOPProfile P> : Enc64 {
26 bits<8> vdst;
27 bits<9> src0;
28 bits<8> src1;
29 bits<32> imm;
30
31 let Inst{8-0} = !if(P.HasSrc0, src0, 0);
32 let Inst{16-9} = !if(P.HasSrc1, src1, 0);
33 let Inst{24-17} = !if(P.EmitDst, vdst, 0);
34 let Inst{30-25} = op;
35 let Inst{31} = 0x0; // encoding
36 let Inst{63-32} = imm;
37 }
38
39 class VOP2_Pseudo pattern=[], string suffix = "_e32"> :
40 InstSI ,
41 VOP ,
42 SIMCInstr ,
43 MnemonicAlias {
44
45 let isPseudo = 1;
46 let isCodeGenOnly = 1;
47 let UseNamedOperandTable = 1;
48
49 string Mnemonic = opName;
50 string AsmOperands = P.Asm32;
51
52 let Size = 4;
53 let mayLoad = 0;
54 let mayStore = 0;
55 let hasSideEffects = 0;
56 let SubtargetPredicate = isGCN;
57
58 let VOP2 = 1;
59 let VALU = 1;
60 let Uses = [EXEC];
61
62 let AsmVariantName = AMDGPUAsmVariants.Default;
63
64 VOPProfile Pfl = P;
65 }
66
67 class VOP2_Real :
68 InstSI ,
69 SIMCInstr {
70
71 let isPseudo = 0;
72 let isCodeGenOnly = 0;
73
74 // copy relevant pseudo op flags
75 let SubtargetPredicate = ps.SubtargetPredicate;
76 let AsmMatchConverter = ps.AsmMatchConverter;
77 let AsmVariantName = ps.AsmVariantName;
78 let Constraints = ps.Constraints;
79 let DisableEncoding = ps.DisableEncoding;
80 let TSFlags = ps.TSFlags;
81 }
82
83 class getVOP2Pat64 : LetDummies {
84 list ret = !if(P.HasModifiers,
85 [(set P.DstVT:$vdst,
86 (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
87 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
88 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
89 }
90
91 multiclass VOP2Inst
92 VOPProfile P,
93 SDPatternOperator node = null_frag,
94 string revOp = opName> {
95
96 def _e32 : VOP2_Pseudo ,
97 Commutable_REV;
98
99 def _e64 : VOP3_Pseudo .ret>,
100 Commutable_REV;
101 }
102
103 multiclass VOP2bInst
104 VOPProfile P,
105 SDPatternOperator node = null_frag,
106 string revOp = opName,
107 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
108
109 let SchedRW = [Write32Bit, WriteSALU] in {
110 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
111 def _e32 : VOP2_Pseudo ,
112 Commutable_REV;
113 }
114 def _e64 : VOP3_Pseudo .ret>,
115 Commutable_REV;
116 }
117 }
118
119 multiclass VOP2eInst
120 VOPProfile P,
121 SDPatternOperator node = null_frag,
122 string revOp = opName,
123 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
124
125 let SchedRW = [Write32Bit] in {
126 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
127 def _e32 : VOP2_Pseudo ,
128 Commutable_REV;
129 }
130 def _e64 : VOP3_Pseudo .ret>,
131 Commutable_REV;
132 }
133 }
134
135 def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
136 field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm);
137 field string Asm32 = "$vdst, $src0, $src1, $imm";
138 field bit HasExt = 0;
139 }
140
141 def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
142 field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1);
143 field string Asm32 = "$vdst, $src0, $imm, $src1";
144 field bit HasExt = 0;
145 }
146
147 def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
148 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
149 let Ins64 = getIns64, 3,
150 HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
151 let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0,
152 FP32InputMods:$src1_modifiers, Src1RC32:$src1,
153 VGPR_32:$src2, // stub argument
154 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
155 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
156 let InsSDWA = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0,
157 FP32InputMods:$src1_modifiers, Src1RC32:$src1,
158 VGPR_32:$src2, // stub argument
159 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
160 src0_sel:$src0_sel, src1_sel:$src1_sel);
161 let Asm32 = getAsm32<1, 2, f32>.ret;
162 let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
163 let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
164 let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
165 let HasSrc2 = 0;
166 let HasSrc2Mods = 0;
167 }
168
169 // Write out to vcc or arbitrary SGPR.
170 def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
171 let Asm32 = "$vdst, vcc, $src0, $src1";
172 let Asm64 = "$vdst, $sdst, $src0, $src1";
173 let Outs32 = (outs DstRC:$vdst);
174 let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
175 }
176
177 // Write out to vcc or arbitrary SGPR and read in from vcc or
178 // arbitrary SGPR.
179 def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
180 // We use VCSrc_b32 to exclude literal constants, even though the
181 // encoding normally allows them since the implicit VCC use means
182 // using one would always violate the constant bus
183 // restriction. SGPRs are still allowed because it should
184 // technically be possible to use VCC again as src0.
185 let Src0RC32 = VCSrc_b32;
186 let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
187 let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
188 let Outs32 = (outs DstRC:$vdst);
189 let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
190
191 // Suppress src2 implied by type since the 32-bit encoding uses an
192 // implicit VCC use.
193 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
194 }
195
196 // Read in from vcc or arbitrary SGPR
197 def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
198 let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
199 let Asm32 = "$vdst, $src0, $src1, vcc";
200 let Asm64 = "$vdst, $src0, $src1, $src2";
201 let Outs32 = (outs DstRC:$vdst);
202 let Outs64 = (outs DstRC:$vdst);
203
204 // Suppress src2 implied by type since the 32-bit encoding uses an
205 // implicit VCC use.
206 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
207 }
208
209 def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
210 let Outs32 = (outs SReg_32:$vdst);
211 let Outs64 = Outs32;
212 let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1);
213 let Ins64 = Ins32;
214 let Asm32 = " $vdst, $src0, $src1";
215 let Asm64 = Asm32;
216 }
217
218 def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> {
219 let Outs32 = (outs VGPR_32:$vdst);
220 let Outs64 = Outs32;
221 let Ins32 = (ins SReg_32:$src0, SCSrc_b32:$src1);
222 let Ins64 = Ins32;
223 let Asm32 = " $vdst, $src0, $src1";
224 let Asm64 = Asm32;
225 }
226
227 //===----------------------------------------------------------------------===//
228 // VOP2 Instructions
229 //===----------------------------------------------------------------------===//
230
231 let SubtargetPredicate = isGCN in {
232
233 defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
234 def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK>;
235
236 let isCommutable = 1 in {
237 defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>;
238 defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
239 defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
240 defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
241 defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
242 defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
243 defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
244 defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
245 defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
246 defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum>;
247 defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum>;
248 defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_I32_I32_I32>;
249 defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_I32_I32_I32>;
250 defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_I32_I32_I32>;
251 defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_I32_I32_I32>;
252 defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
253 defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
254 defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">;
255 defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_I32_I32_I32>;
256 defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_I32_I32_I32>;
257 defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_I32_I32_I32>;
258
259 let Constraints = "$vdst = $src2", DisableEncoding="$src2",
260 isConvertibleToThreeAddress = 1 in {
261 defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC>;
262 }
263
264 def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK>;
265
266 // No patterns so that the scalar instructions are always selected.
267 // The scalar versions will be replaced with vector when needed later.
268
269 // V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
270 // but the VI instructions behave the same as the SI versions.
271 defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32>;
272 defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32>;
273 defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32">;
274 defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>;
275 defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>;
276 defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
277 } // End isCommutable = 1
278
279 // These are special and do not read the exec mask.
280 let isConvergent = 1, Uses = [] in {
281 def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
282 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))], "">;
283
284 def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [], "">;
285 } // End isConvergent = 1
286
287 defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>;
288 defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32>;
289 defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>;
290 defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>;
291 defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>;
292 defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst"
293 defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>;
294 defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>;
295 defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, int_SI_packf16>;
296 defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>;
297 defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>;
298
299 } // End SubtargetPredicate = isGCN
300
301
302 // These instructions only exist on SI and CI
303 let SubtargetPredicate = isSICI in {
304
305 defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
306 defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
307
308 let isCommutable = 1 in {
309 defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
310 defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>;
311 defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>;
312 defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
313 } // End isCommutable = 1
314
315 } // End let SubtargetPredicate = SICI
316
317 let SubtargetPredicate = isVI in {
318
319 def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK>;
320 defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>;
321 defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>;
322 defm V_ASHRREV_B16 : VOP2Inst <"v_ashrrev_b16", VOP_I16_I16_I16>;
323 defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I16>;
324
325 let isCommutable = 1 in {
326 defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16>;
327 defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16>;
328 defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
329 defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16>;
330 defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_F16_F16_F16>;
331 def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK>;
332 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>;
333 defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
334 defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16>;
335 defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>;
336 defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16>;
337 defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16>;
338 defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>;
339 defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>;
340 defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>;
341 defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>;
342 } // End isCommutable = 1
343
344 } // End SubtargetPredicate = isVI
345
346 //===----------------------------------------------------------------------===//
347 // SI
348 //===----------------------------------------------------------------------===//
349
350 let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
351
352 multiclass VOP2_Real_si op> {
353 def _si :
354 VOP2_Real(NAME), SIEncodingFamily.SI>,
355 VOP2e(NAME).Pfl>;
356 }
357
358 multiclass VOP2_Real_MADK_si op> {
359 def _si : VOP2_Real(NAME), SIEncodingFamily.SI>,
360 VOP2_MADKe(NAME).Pfl>;
361 }
362
363 multiclass VOP2_Real_e32_si op> {
364 def _e32_si :
365 VOP2_Real(NAME#"_e32"), SIEncodingFamily.SI>,
366 VOP2e(NAME#"_e32").Pfl>;
367 }
368
369 multiclass VOP2_Real_e32e64_si op> : VOP2_Real_e32_si {
370 def _e64_si :
371 VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>,
372 VOP3e_si <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>;
373 }
374
375 multiclass VOP2be_Real_e32e64_si op> : VOP2_Real_e32_si {
376 def _e64_si :
377 VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>,
378 VOP3be_si <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>;
379 }
380
381 } // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
382
383 defm V_CNDMASK_B32 : VOP2_Real_e32e64_si <0x0>;
384 defm V_ADD_F32 : VOP2_Real_e32e64_si <0x3>;
385 defm V_SUB_F32 : VOP2_Real_e32e64_si <0x4>;
386 defm V_SUBREV_F32 : VOP2_Real_e32e64_si <0x5>;
387 defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_si <0x7>;
388 defm V_MUL_F32 : VOP2_Real_e32e64_si <0x8>;
389 defm V_MUL_I32_I24 : VOP2_Real_e32e64_si <0x9>;
390 defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_si <0xa>;
391 defm V_MUL_U32_U24 : VOP2_Real_e32e64_si <0xb>;
392 defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_si <0xc>;
393 defm V_MIN_F32 : VOP2_Real_e32e64_si <0xf>;
394 defm V_MAX_F32 : VOP2_Real_e32e64_si <0x10>;
395 defm V_MIN_I32 : VOP2_Real_e32e64_si <0x11>;
396 defm V_MAX_I32 : VOP2_Real_e32e64_si <0x12>;
397 defm V_MIN_U32 : VOP2_Real_e32e64_si <0x13>;
398 defm V_MAX_U32 : VOP2_Real_e32e64_si <0x14>;
399 defm V_LSHRREV_B32 : VOP2_Real_e32e64_si <0x16>;
400 defm V_ASHRREV_I32 : VOP2_Real_e32e64_si <0x18>;
401 defm V_LSHLREV_B32 : VOP2_Real_e32e64_si <0x1a>;
402 defm V_AND_B32 : VOP2_Real_e32e64_si <0x1b>;
403 defm V_OR_B32 : VOP2_Real_e32e64_si <0x1c>;
404 defm V_XOR_B32 : VOP2_Real_e32e64_si <0x1d>;
405 defm V_MAC_F32 : VOP2_Real_e32e64_si <0x1f>;
406 defm V_MADMK_F32 : VOP2_Real_MADK_si <0x20>;
407 defm V_MADAK_F32 : VOP2_Real_MADK_si <0x21>;
408 defm V_ADD_I32 : VOP2be_Real_e32e64_si <0x25>;
409 defm V_SUB_I32 : VOP2be_Real_e32e64_si <0x26>;
410 defm V_SUBREV_I32 : VOP2be_Real_e32e64_si <0x27>;
411 defm V_ADDC_U32 : VOP2be_Real_e32e64_si <0x28>;
412 defm V_SUBB_U32 : VOP2be_Real_e32e64_si <0x29>;
413 defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>;
414
415 defm V_READLANE_B32 : VOP2_Real_si <0x01>;
416 defm V_WRITELANE_B32 : VOP2_Real_si <0x02>;
417
418 defm V_MAC_LEGACY_F32 : VOP2_Real_e32e64_si <0x6>;
419 defm V_MIN_LEGACY_F32 : VOP2_Real_e32e64_si <0xd>;
420 defm V_MAX_LEGACY_F32 : VOP2_Real_e32e64_si <0xe>;
421 defm V_LSHR_B32 : VOP2_Real_e32e64_si <0x15>;
422 defm V_ASHR_I32 : VOP2_Real_e32e64_si <0x17>;
423 defm V_LSHL_B32 : VOP2_Real_e32e64_si <0x19>;
424
425 defm V_BFM_B32 : VOP2_Real_e32e64_si <0x1e>;
426 defm V_BCNT_U32_B32 : VOP2_Real_e32e64_si <0x22>;
427 defm V_MBCNT_LO_U32_B32 : VOP2_Real_e32e64_si <0x23>;
428 defm V_MBCNT_HI_U32_B32 : VOP2_Real_e32e64_si <0x24>;
429 defm V_LDEXP_F32 : VOP2_Real_e32e64_si <0x2b>;
430 defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e32e64_si <0x2c>;
431 defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e32e64_si <0x2d>;
432 defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e32e64_si <0x2e>;
433 defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e32e64_si <0x2f>;
434 defm V_CVT_PK_U16_U32 : VOP2_Real_e32e64_si <0x30>;
435 defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>;
436
437
438 //===----------------------------------------------------------------------===//
439 // VI
440 //===----------------------------------------------------------------------===//
441
442 class VOP2_SDWA op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> :
443 VOP_SDWA {
444 let Defs = ps.Defs;
445 let Uses = ps.Uses;
446 let SchedRW = ps.SchedRW;
447 let hasSideEffects = ps.hasSideEffects;
448 let AsmMatchConverter = "cvtSdwaVOP2";
449
450 bits<8> vdst;
451 bits<8> src1;
452 let Inst{8-0} = 0xf9; // sdwa
453 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
454 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
455 let Inst{30-25} = op;
456 let Inst{31} = 0x0; // encoding
457 }
458
459 class VOP2_DPP op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> :
460 VOP_DPP {
461 let Defs = ps.Defs;
462 let Uses = ps.Uses;
463 let SchedRW = ps.SchedRW;
464 let hasSideEffects = ps.hasSideEffects;
465
466 bits<8> vdst;
467 bits<8> src1;
468 let Inst{8-0} = 0xfa; //dpp
469 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
470 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
471 let Inst{30-25} = op;
472 let Inst{31} = 0x0; //encoding
473 }
474
475 let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
476
477 multiclass VOP32_Real_vi op> {
478 def _vi :
479 VOP2_Real(NAME), SIEncodingFamily.VI>,
480 VOP3e_vi(NAME).Pfl>;
481 }
482
483 multiclass VOP2_Real_MADK_vi op> {
484 def _vi : VOP2_Real(NAME), SIEncodingFamily.VI>,
485 VOP2_MADKe(NAME).Pfl>;
486 }
487
488 multiclass VOP2_Real_e32_vi op> {
489 def _e32_vi :
490 VOP2_Real(NAME#"_e32"), SIEncodingFamily.VI>,
491 VOP2e(NAME#"_e32").Pfl>;
492 }
493
494 multiclass VOP2_Real_e64_vi op> {
495 def _e64_vi :
496 VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>,
497 VOP3e_vi (NAME#"_e64").Pfl>;
498 }
499
500 multiclass VOP2be_Real_e32e64_vi op> : VOP2_Real_e32_vi {
501 def _e64_vi :
502 VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>,
503 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>;
504 }
505
506 multiclass Base_VOP2_Real_e32e64_vi op> :
507 VOP2_Real_e32_vi,
508 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
509
510 } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
511
512 multiclass VOP2_Real_e32e64_vi op> :
513 Base_VOP2_Real_e32e64_vi {
514 // for now left sdwa/dpp only for asm/dasm
515 // TODO: add corresponding pseudo
516 def _sdwa : VOP2_SDWA(NAME#"_e32")>;
517 def _dpp : VOP2_DPP(NAME#"_e32")>;
518 }
519
520 defm V_CNDMASK_B32 : Base_VOP2_Real_e32e64_vi <0x0>;
521 defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>;
522 defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>;
523 defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>;
524 defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>;
525 defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>;
526 defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>;
527 defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>;
528 defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>;
529 defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>;
530 defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>;
531 defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>;
532 defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>;
533 defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>;
534 defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>;
535 defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>;
536 defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>;
537 defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>;
538 defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>;
539 defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>;
540 defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>;
541 defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>;
542 defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>;
543 defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>;
544 defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>;
545 defm V_ADD_I32 : VOP2be_Real_e32e64_vi <0x19>;
546 defm V_SUB_I32 : VOP2be_Real_e32e64_vi <0x1a>;
547 defm V_SUBREV_I32 : VOP2be_Real_e32e64_vi <0x1b>;
548 defm V_ADDC_U32 : VOP2be_Real_e32e64_vi <0x1c>;
549 defm V_SUBB_U32 : VOP2be_Real_e32e64_vi <0x1d>;
550 defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>;
551
552 defm V_READLANE_B32 : VOP32_Real_vi <0x289>;
553 defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>;
554
555 defm V_BFM_B32 : VOP2_Real_e64_vi <0x293>;
556 defm V_BCNT_U32_B32 : VOP2_Real_e64_vi <0x28b>;
557 defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64_vi <0x28c>;
558 defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64_vi <0x28d>;
559 defm V_LDEXP_F32 : VOP2_Real_e64_vi <0x288>;
560 defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>;
561 defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>;
562 defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>;
563 defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64_vi <0x296>;
564 defm V_CVT_PK_U16_U32 : VOP2_Real_e64_vi <0x297>;
565 defm V_CVT_PK_I16_I32 : VOP2_Real_e64_vi <0x298>;
566
567 defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>;
568 defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>;
569 defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>;
570 defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>;
571 defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>;
572 defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>;
573 defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>;
574 defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>;
575 defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>;
576 defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>;
577 defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>;
578 defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>;
579 defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>;
580 defm V_ASHRREV_B16 : VOP2_Real_e32e64_vi <0x2c>;
581 defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>;
582 defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>;
583 defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>;
584 defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>;
585 defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>;
586 defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>;
587 defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>;
588
589 let SubtargetPredicate = isVI in {
590
591 // Aliases to simplify matching of floating-point instructions that
592 // are VOP2 on SI and VOP3 on VI.
593 class SI2_VI3Alias : InstAlias <
594 name#" $dst, $src0, $src1",
595 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0)
596 >, PredicateControl {
597 let UseInstAsmMatchConverter = 0;
598 let AsmVariantName = AMDGPUAsmVariants.VOP3;
599 }
600
601 def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
602 def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
603 def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
604 def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
605 def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
606
607 } // End SubtargetPredicate = isVI
3838 }
3939
4040 class VOP3Inst :
41 VOP3_PseudoNew
41 VOP3_Pseudo
4242 !if(P.HasModifiers, getVOP3ModPat.ret, getVOP3Pat.ret),
4343 VOP3Only>;
4444
117117 // if (vcc)
118118 // result *= 2^32
119119 //
120 def V_DIV_FMAS_F32 : VOP3_PseudoNew <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
120 def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
121121 getVOP3VCC.ret> {
122122 let SchedRW = [WriteFloatFMA];
123123 }
126126 // if (vcc)
127127 // result *= 2^64
128128 //
129 def V_DIV_FMAS_F64 : VOP3_PseudoNew <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
129 def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
130130 getVOP3VCC.ret> {
131131 let SchedRW = [WriteDouble];
132132 }
164164 def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile, AMDGPUldexp, 1>;
165165 } // End SchedRW = [WriteDoubleAdd]
166166
167 def V_DIV_SCALE_F32 : VOP3_PseudoNew <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
167 def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
168168 let SchedRW = [WriteFloatFMA, WriteSALU];
169169 }
170170
171171 // Double precision division pre-scale.
172 def V_DIV_SCALE_F64 : VOP3_PseudoNew <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
172 def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
173173 let SchedRW = [WriteDouble, WriteSALU];
174174 }
175175
233233 let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
234234
235235 multiclass VOP3_Real_si op> {
236 def _si : VOP3_Real(NAME), SIEncodingFamily.SI>,
237 VOP3e_siNew (NAME).Pfl>;
236 def _si : VOP3_Real(NAME), SIEncodingFamily.SI>,
237 VOP3e_si (NAME).Pfl>;
238238 }
239239
240240 multiclass VOP3be_Real_si op> {
241 def _si : VOP3_Real(NAME), SIEncodingFamily.SI>,
242 VOP3be_siNew (NAME).Pfl>;
241 def _si : VOP3_Real(NAME), SIEncodingFamily.SI>,
242 VOP3be_si (NAME).Pfl>;
243243 }
244244
245245 } // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
302302 //===----------------------------------------------------------------------===//
303303
304304 multiclass VOP3_Real_ci op> {
305 def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>,
306 VOP3e_siNew (NAME).Pfl> {
305 def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>,
306 VOP3e_si (NAME).Pfl> {
307307 let AssemblerPredicates = [isCIOnly];
308308 let DecoderNamespace = "CI";
309309 }
322322 let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
323323
324324 multiclass VOP3_Real_vi op> {
325 def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>,
326 VOP3e_viNew (NAME).Pfl>;
325 def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>,
326 VOP3e_vi (NAME).Pfl>;
327327 }
328328
329329 multiclass VOP3be_Real_vi op> {
330 def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>,
331 VOP3be_viNew (NAME).Pfl>;
330 def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>,
331 VOP3be_vi (NAME).Pfl>;
332332 }
333333
334334 } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
7979 }
8080
8181 // This class is used only with VOPC instructions. Use $sdst for out operand
82 class VOPCInstAlias New ps, Instruction inst, VOPProfile p = ps.Pfl> :
82 class VOPCInstAlias ps, Instruction inst, VOPProfile p = ps.Pfl> :
8383 InstAlias , PredicateControl {
8484
8585 field bit isCompare;
127127 let isCompare = 1;
128128 let isCommutable = 1;
129129 }
130 def _e64 : VOP3_PseudoNew
130 def _e64 : VOP3_Pseudo
131131 !if(P.HasModifiers,
132132 [(set i1:$sdst,
133133 (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
397397 VOPC_Profile {
398398 let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
399399 let Asm64 = "$sdst, $src0_modifiers, $src1";
400 let InsSDWA = (ins Src0Mod:$src0_fmodifiers, Src0RC64:$src0,
401 Int32InputMods:$src1_imodifiers, Src1RC64:$src1,
400 let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0,
401 Int32InputMods:$src1_modifiers, Src1RC64:$src1,
402402 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
403 let AsmSDWA = " vcc, $src0_fmodifiers, $src1_imodifiers$clamp $src0_sel $src1_sel";
403 let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
404 let HasSrc1Mods = 0;
404405 let HasClamp = 0;
405406 let HasOMod = 0;
406407 }
421422 let SchedRW = p.Schedule;
422423 let isConvergent = DefExec;
423424 }
424 def _e64 : VOP3_PseudoNew.ret> {
425 def _e64 : VOP3_Pseudo.ret> {
425426 let Defs = !if(DefExec, [EXEC], []);
426427 let SchedRW = p.Schedule;
427428 }
532533 VOPCe;
533534
534535 def _e64_si :
535 VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>,
536 VOP3a_siNew (NAME#"_e64").Pfl> {
536 VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>,
537 VOP3a_si (NAME#"_e64").Pfl> {
537538 // Encoding used for VOPC instructions encoded as VOP3
538539 // Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
539540 bits<8> sdst;
540541 let Inst{7-0} = sdst;
541542 }
542543 }
543 def : VOPCInstAlias New>(NAME#"_e64"),
544 def : VOPCInstAlias >(NAME#"_e64"),
544545 !cast(NAME#"_e32_si")> {
545546 let AssemblerPredicate = isSICI;
546547 }
763764 // VI
764765 //===----------------------------------------------------------------------===//
765766
766 class VOPC_SDWAe op, VOPProfile P> : VOP_SDWAeNew

{

767 class VOPC_SDWA op, VOPC_Pseudo ps, VOPProfile P = ps.Pfl> :
768 VOP_SDWA {
769 let Defs = ps.Defs;
770 let hasSideEffects = ps.hasSideEffects;
771 let AsmMatchConverter = "cvtSdwaVOPC";
772 let isCompare = ps.isCompare;
773 let isCommutable = ps.isCommutable;
774
767775 bits<8> src1;
768
769776 let Inst{8-0} = 0xf9; // sdwa
770777 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
771778 let Inst{24-17} = op;
774781 // VOPC disallows dst_sel and dst_unused as they have no effect on destination
775782 let Inst{42-40} = SDWA_DWORD;
776783 let Inst{44-43} = SDWA_UNUSED_PRESERVE;
777 }
778
779 class VOPC_SDWA op, VOPC_Pseudo ps, VOPProfile p = ps.Pfl> :
780 VOP_SDWA ,
781 VOPC_SDWAe {
782 let Defs = ps.Defs;
783 let hasSideEffects = ps.hasSideEffects;
784 let AsmMatchConverter = "cvtSdwaVOPC";
785 let SubtargetPredicate = isVI;
786 let AssemblerPredicate = !if(p.HasExt, isVI, DisableInst);
787 let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA,
788 AMDGPUAsmVariants.Disable);
789 let DecoderNamespace = "SDWA";
790 let isCompare = ps.isCompare;
791 let isCommutable = ps.isCommutable;
792784 }
793785
794786 multiclass VOPC_Real_vi op> {
798790 VOPCe;
799791
800792 def _e64_vi :
801 VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>,
802 VOP3a_viNew (NAME#"_e64").Pfl> {
793 VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>,
794 VOP3a_vi (NAME#"_e64").Pfl> {
803795 // Encoding used for VOPC instructions encoded as VOP3
804796 // Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
805797 bits<8> sdst;
811803 // TODO: add corresponding pseudo
812804 def _sdwa : VOPC_SDWA(NAME#"_e32")>;
813805
814 def : VOPCInstAlias New>(NAME#"_e64"),
806 def : VOPCInstAlias >(NAME#"_e64"),
815807 !cast(NAME#"_e32_vi")> {
816808 let AssemblerPredicate = isVI;
817809 }
66 //
77 //===----------------------------------------------------------------------===//
88
9 class VOP3_PseudoNew pattern, bit VOP3Only = 0> :
10 InstSI ,
11 VOP ,
12 SIMCInstr,
13 MnemonicAlias {
14
15 let isPseudo = 1;
16 let isCodeGenOnly = 1;
17 let UseNamedOperandTable = 1;
18
19 string Mnemonic = opName;
20 string AsmOperands = P.Asm64;
21
22 let Size = 8;
23 let mayLoad = 0;
24 let mayStore = 0;
25 let hasSideEffects = 0;
26 let SubtargetPredicate = isGCN;
27
28 // Because SGPRs may be allowed if there are multiple operands, we
29 // need a post-isel hook to insert copies in order to avoid
30 // violating constant bus requirements.
31 let hasPostISelHook = 1;
9 // dummies for outer let
10 class LetDummies {
11 bit isCommutable;
12 bit isConvertibleToThreeAddress;
13 bit isMoveImm;
14 bit isReMaterializable;
15 bit isAsCheapAsAMove;
16 bit VOPAsmPrefer32Bit;
17 Predicate SubtargetPredicate;
18 string Constraints;
19 string DisableEncoding;
20 list SchedRW;
21 list Uses;
22 list Defs;
23 }
24
25 class VOP {
26 string OpName = opName;
27 }
28
29 class VOPAnyCommon pattern> :
30 InstSI {
31
32 let mayLoad = 0;
33 let mayStore = 0;
34 let hasSideEffects = 0;
35 let UseNamedOperandTable = 1;
36 let VALU = 1;
37 }
38
39 class VOP3Common
40 list pattern = [], bit HasMods = 0,
41 bit VOP3Only = 0> :
42 VOPAnyCommon {
3243
3344 // Using complex patterns gives VOP3 patterns a very high complexity rating,
3445 // but standalone patterns are almost always prefered, so we need to adjust the
4051 let VALU = 1;
4152 let Uses = [EXEC];
4253
54 let AsmMatchConverter =
55 !if(!eq(VOP3Only,1),
56 "cvtVOP3",
57 !if(!eq(HasMods,1), "cvtVOP3_2_mod", ""));
58
59 let AsmVariantName = AMDGPUAsmVariants.VOP3;
60
61 let isCodeGenOnly = 0;
62
63 int Size = 8;
64
65 // Because SGPRs may be allowed if there are multiple operands, we
66 // need a post-isel hook to insert copies in order to avoid
67 // violating constant bus requirements.
68 let hasPostISelHook = 1;
69 }
70
71
72 class VOP3_Pseudo pattern=[], bit VOP3Only = 0> :
73 InstSI ,
74 VOP ,
75 SIMCInstr,
76 MnemonicAlias {
77
78 let isPseudo = 1;
79 let isCodeGenOnly = 1;
80 let UseNamedOperandTable = 1;
81
82 string Mnemonic = opName;
83 string AsmOperands = P.Asm64;
84
85 let Size = 8;
86 let mayLoad = 0;
87 let mayStore = 0;
88 let hasSideEffects = 0;
89 let SubtargetPredicate = isGCN;
90
91 // Because SGPRs may be allowed if there are multiple operands, we
92 // need a post-isel hook to insert copies in order to avoid
93 // violating constant bus requirements.
94 let hasPostISelHook = 1;
95
96 // Using complex patterns gives VOP3 patterns a very high complexity rating,
97 // but standalone patterns are almost always prefered, so we need to adjust the
98 // priority lower. The goal is to use a high number to reduce complexity to
99 // zero (or less than zero).
100 let AddedComplexity = -1000;
101
102 let VOP3 = 1;
103 let VALU = 1;
104 let Uses = [EXEC];
105
43106 let AsmVariantName = AMDGPUAsmVariants.VOP3;
44107 let AsmMatchConverter =
45108 !if(!eq(VOP3Only,1),
49112 VOPProfile Pfl = P;
50113 }
51114
52 class VOP3_Real New ps, int EncodingFamily> :
115 class VOP3_Real ps, int EncodingFamily> :
53116 InstSI ,
54117 SIMCInstr {
55118
65128 let TSFlags = ps.TSFlags;
66129 }
67130
68 class VOP3aNew : Enc64 {
131 class VOP3a : Enc64 {
69132 bits<2> src0_modifiers;
70133 bits<9> src0;
71134 bits<2> src1_modifiers;
80143 let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
81144
82145 let Inst{31-26} = 0x34; //encoding
83 let Inst{40-32} = src0;
146 let Inst{40-32} = !if(P.HasSrc0, src0, 0);
84147 let Inst{49-41} = !if(P.HasSrc1, src1, 0);
85148 let Inst{58-50} = !if(P.HasSrc2, src2, 0);
86149 let Inst{60-59} = !if(P.HasOMod, omod, 0);
89152 let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
90153 }
91154
92 class VOP3a_siNew op, VOPProfile P> : VOP3aNew

{

155 class VOP3a_si op, VOPProfile P> : VOP3a

{

93156 let Inst{25-17} = op;
94 let Inst{11} = !if(P.HasClamp, clamp, 0);
95 }
96
97 class VOP3a_viNew op, VOPProfile P> : VOP3aNew

{

157 let Inst{11} = !if(P.HasClamp, clamp{0}, 0);
158 }
159
160 class VOP3a_vi op, VOPProfile P> : VOP3a

{

98161 let Inst{25-16} = op;
99 let Inst{15} = !if(P.HasClamp, clamp, 0);
100 }
101
102 class VOP3e_siNew op, VOPProfile P> : VOP3a_siNew {
162 let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
163 }
164
165 class VOP3e_si op, VOPProfile P> : VOP3a_si {
103166 bits<8> vdst;
104 let Inst{7-0} = vdst;
105 }
106
107 class VOP3e_viNew op, VOPProfile P> : VOP3a_viNew {
167 let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
168 }
169
170 class VOP3e_vi op, VOPProfile P> : VOP3a_vi {
108171 bits<8> vdst;
109 let Inst{7-0} = vdst;
110 }
111
112 class VOP3beNew : Enc64 {
172 let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
173 }
174
175 class VOP3be : Enc64 {
113176 bits<8> vdst;
114177 bits<2> src0_modifiers;
115178 bits<9> src0;
132195 let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
133196 }
134197
135 class VOP3be_siNew op, VOPProfile P> : VOP3beNew

{

198 class VOP3be_si op, VOPProfile P> : VOP3be

{

136199 let Inst{25-17} = op;
137200 }
138201
139 class VOP3be_viNew op, VOPProfile P> : VOP3beNew

{

202 class VOP3be_vi op, VOPProfile P> : VOP3be

{

140203 bits<1> clamp;
141204 let Inst{25-16} = op;
142 let Inst{15} = !if(P.HasClamp, clamp, 0);
143 }
144
145 class VOP_SDWAeNew : Enc64 {
205 let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
206 }
207
208 class VOP_SDWAe : Enc64 {
146209 bits<8> src0;
147210 bits<3> src0_sel;
148 bits<2> src0_fmodifiers; // {abs,neg}
149 bits<1> src0_imodifiers; // sext
211 bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
150212 bits<3> src1_sel;
151 bits<2> src1_fmodifiers;
152 bits<1> src1_imodifiers;
213 bits<2> src1_modifiers;
153214 bits<3> dst_sel;
154215 bits<2> dst_unused;
155216 bits<1> clamp;
158219 bits<2> SDWA_UNUSED_PRESERVE = 2;
159220
160221 let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
161 let Inst{42-40} = !if(P.HasDst, dst_sel{2-0}, SDWA_DWORD{2-0});
162 let Inst{44-43} = !if(P.HasDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0});
222 let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA_DWORD{2-0});
223 let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0});
163224 let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0);
164225 let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA_DWORD{2-0});
165 let Inst{53-52} = !if(P.HasSrc0Mods, src0_fmodifiers{1-0}, 0);
166 let Inst{51} = !if(P.HasSrc0IntMods, src0_imodifiers{0}, 0);
226 let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
227 let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
167228 let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA_DWORD{2-0});
168 let Inst{61-60} = !if(P.HasSrc1Mods, src1_fmodifiers{1-0}, 0);
169 let Inst{59} = !if(P.HasSrc1IntMods, src1_imodifiers{0}, 0);
229 let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
230 let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
231 }
232
233 class VOP_SDWA :
234 InstSI ,
235 VOP_SDWAe

{

236 let mayLoad = 0;
237 let mayStore = 0;
238 let hasSideEffects = 0;
239 let UseNamedOperandTable = 1;
240 let VALU = 1;
241 let SDWA = 1;
242 let Size = 8;
243
244 let SubtargetPredicate = isVI;
245 let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
246 let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
247 AMDGPUAsmVariants.Disable);
248 let DecoderNamespace = "SDWA";
249 }
250
251 class VOP_DPPe : Enc64 {
252 bits<2> src0_modifiers;
253 bits<8> src0;
254 bits<2> src1_modifiers;
255 bits<9> dpp_ctrl;
256 bits<1> bound_ctrl;
257 bits<4> bank_mask;
258 bits<4> row_mask;
259
260 let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
261 let Inst{48-40} = dpp_ctrl;
262 let Inst{51} = bound_ctrl;
263 let Inst{52} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg
264 let Inst{53} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs
265 let Inst{54} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // src1_neg
266 let Inst{55} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // src1_abs
267 let Inst{59-56} = bank_mask;
268 let Inst{63-60} = row_mask;
269 }
270
271 class VOP_DPP :
272 InstSI ,
273 VOP_DPPe

{

274
275 let mayLoad = 0;
276 let mayStore = 0;
277 let hasSideEffects = 0;
278 let UseNamedOperandTable = 1;
279
280 let VALU = 1;
281 let DPP = 1;
282 let Size = 8;
283
284 let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
285 let SubtargetPredicate = isVI;
286 let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
287 let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
288 AMDGPUAsmVariants.Disable);
289 let DecoderNamespace = "DPP";
170290 }
171291
172292 include "VOPCInstructions.td"
293 include "VOP1Instructions.td"
294 include "VOP2Instructions.td"
173295 include "VOP3Instructions.td"