llvm.org GIT mirror llvm / cb5431a
[AMDGPU][MC] Fix for Bug 28207 + LIT tests Enabled clamp and omod for v_cvt_* opcodes which have src0 of an integer type Reviewers: vpykhtin, arsenm Differential Revision: https://reviews.llvm.org/D31327 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298852 91177308-0d34-0410-b5e6-96231b3b80d8 Dmitry Preobrazhensky 3 years ago
6 changed file(s) with 226 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
161161 SDValue &Clamp,
162162 SDValue &Omod) const;
163163
164 bool SelectVOP3OMods(SDValue In, SDValue &Src,
165 SDValue &Clamp, SDValue &Omod) const;
166
164167 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
165168 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
166169 SDValue &Clamp) const;
16681671 return SelectVOP3Mods(In, Src, SrcMods);
16691672 }
16701673
1674 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
1675 SDValue &Clamp, SDValue &Omod) const {
1676 Src = In;
1677
1678 SDLoc DL(In);
1679 // FIXME: Handle Clamp and Omod
1680 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1681 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1682
1683 return true;
1684 }
1685
16711686 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
16721687 SDValue &SrcMods) const {
16731688 unsigned Mods = 0;
10171017
10181018 void cvtId(MCInst &Inst, const OperandVector &Operands);
10191019 void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
1020 void cvtVOP3_omod(MCInst &Inst, const OperandVector &Operands);
10201021
10211022 void cvtVOP3Impl(MCInst &Inst,
10221023 const OperandVector &Operands,
10231024 OptionalImmIndexMap &OptionalIdx);
10241025 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1026 void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands);
10251027 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
10261028
10271029 void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
36773679 }
36783680 }
36793681
3682 void AMDGPUAsmParser::cvtVOP3_omod(MCInst &Inst, const OperandVector &Operands) {
3683 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3684 if (TSFlags & SIInstrFlags::VOP3) {
3685 cvtVOP3OMod(Inst, Operands);
3686 } else {
3687 cvtId(Inst, Operands);
3688 }
3689 }
3690
36803691 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
36813692 // 1. This operand is input modifiers
36823693 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
37343745 ++it;
37353746 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
37363747 }
3748 }
3749
3750 void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) {
3751 OptionalImmIndexMap OptionalIdx;
3752
3753 unsigned I = 1;
3754 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3755 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
3756 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
3757 }
3758
3759 for (unsigned E = Operands.size(); I != E; ++I) {
3760 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3761 if (Op.isMod()) {
3762 OptionalIdx[Op.getImmTy()] = I;
3763 } else {
3764 Op.addRegOrImmOperands(Inst, 1);
3765 }
3766 }
3767
3768 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
3769 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
37373770 }
37383771
37393772 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
657657 def VOP3NoMods : ComplexPattern;
658658 // VOP3Mods, but the input source is known to never be NaN.
659659 def VOP3Mods_nnan : ComplexPattern;
660
661 def VOP3OMods : ComplexPattern;
660662
661663 def VOP3PMods : ComplexPattern;
662664 def VOP3PMods0 : ComplexPattern;
8484 }
8585
8686 class getVOP1Pat64 : LetDummies {
87 list ret = !if(P.HasModifiers,
88 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
89 i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
90 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]);
87 list ret =
88 !if(P.HasModifiers,
89 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
90 i32:$src0_modifiers,
91 i1:$clamp, i32:$omod))))],
92 !if(P.HasOMod,
93 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0,
94 i1:$clamp, i32:$omod))))],
95 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]
96 )
97 );
9198 }
9299
93100 multiclass VOP1Inst
96103 def _e64 : VOP3_Pseudo .ret>;
97104 def _sdwa : VOP1_SDWA_Pseudo ;
98105 }
106
107 // Special profile for instructions which have clamp
108 // and output modifiers (but have no input modifiers)
109 class VOPProfileI2F :
110 VOPProfile<[dstVt, srcVt, untyped, untyped]> {
111
112 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
113 let Asm64 = "$vdst, $src0$clamp$omod";
114
115 let HasModifiers = 0;
116 let HasClamp = 1;
117 let HasOMod = 1;
118 }
119
120 def VOP1_F64_I32 : VOPProfileI2F ;
121 def VOP1_F32_I32 : VOPProfileI2F ;
122 def VOP1_F16_I16 : VOPProfileI2F ;
99123
100124 //===----------------------------------------------------------------------===//
101125 // VOP1 Instructions
143167
144168 let SchedRW = [WriteQuarterRate32] in {
145169 defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
146 defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>;
147 defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
148 defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
170 defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
171 defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
172 defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
149173 defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
150174 defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
151175 defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
152176 defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
153177 defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
154178 defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
155 defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;
179 defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
156180 defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
157181 defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
158 defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>;
159 defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>;
160 defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>;
161 defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>;
182 defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
183 defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
184 defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
185 defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
162186 defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
163 defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>;
187 defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
164188 } // End SchedRW = [WriteQuarterRate32]
165189
166190 defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
298322
299323 let SubtargetPredicate = isVI in {
300324
301 defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16, uint_to_fp>;
302 defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16, sint_to_fp>;
325 defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
326 defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
303327 defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
304328 defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
305329 defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
106106 let AsmVariantName = AMDGPUAsmVariants.VOP3;
107107 let AsmMatchConverter =
108108 !if(!eq(VOP3Only,1),
109 !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
110 !if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", ""));
109 !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
110 !if(!eq(P.HasModifiers, 1),
111 "cvtVOP3_2_mod",
112 !if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "")
113 )
114 );
111115
112116 VOPProfile Pfl = P;
113117 }
254254 // CHECK: [0x00,0x00,0xc4,0xd1,0x00,0x00,0xe0,0x83]
255255
256256 v_cubeid_f32 v0, s0, s0, abs(0x3e22f983)
257 // CHECK: [0x00,0x04,0xc4,0xd1,0x00,0x00,0xe0,0x03]
257 // CHECK: [0x00,0x04,0xc4,0xd1,0x00,0x00,0xe0,0x03]
258
259
260 //---------------------------------------------------------------------------//
261 // VOP3 Instructions without Input Modifiers but with Output Modifiers
262 //---------------------------------------------------------------------------//
263
264 v_cvt_f64_i32_e64 v[5:6], s1 clamp
265 // CHECK: [0x05,0x80,0x44,0xd1,0x01,0x00,0x00,0x00]
266
267 v_cvt_f64_i32_e64 v[5:6], s1 mul:2
268 // CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x08]
269
270 v_cvt_f64_i32_e64 v[5:6], s1 mul:4
271 // CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x10]
272
273 v_cvt_f64_i32_e64 v[5:6], s1 div:2
274 // CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x18]
275
276
277 v_cvt_f64_u32_e64 v[5:6], s1 clamp
278 // CHECK: [0x05,0x80,0x56,0xd1,0x01,0x00,0x00,0x00]
279
280 v_cvt_f64_u32_e64 v[5:6], s1 mul:2
281 // CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x08]
282
283 v_cvt_f64_u32_e64 v[5:6], s1 mul:4
284 // CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x10]
285
286 v_cvt_f64_u32_e64 v[5:6], s1 div:2
287 // CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x18]
288
289
290 v_cvt_f32_i32_e64 v5, s1 clamp
291 // CHECK: [0x05,0x80,0x45,0xd1,0x01,0x00,0x00,0x00]
292
293 v_cvt_f32_i32_e64 v5, s1 mul:2
294 // CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x08]
295
296 v_cvt_f32_i32_e64 v5, s1 mul:4
297 // CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x10]
298
299 v_cvt_f32_i32_e64 v5, s1 div:2
300 // CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x18]
301
302
303 v_cvt_f32_u32_e64 v5, s1 clamp
304 // CHECK: [0x05,0x80,0x46,0xd1,0x01,0x00,0x00,0x00]
305
306 v_cvt_f32_u32_e64 v5, s1 mul:2
307 // CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x08]
308
309 v_cvt_f32_u32_e64 v5, s1 mul:4
310 // CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x10]
311
312 v_cvt_f32_u32_e64 v5, s1 div:2
313 // CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x18]
314
315
316 v_cvt_off_f32_i4_e64 v5, s1 clamp
317 // CHECK: [0x05,0x80,0x4e,0xd1,0x01,0x00,0x00,0x00]
318
319 v_cvt_off_f32_i4_e64 v5, s1 mul:2
320 // CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x08]
321
322 v_cvt_off_f32_i4_e64 v5, s1 mul:4
323 // CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x10]
324
325 v_cvt_off_f32_i4_e64 v5, s1 div:2
326 // CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x18]
327
328
329 v_cvt_f32_ubyte0_e64 v5, s1 clamp
330 // CHECK: [0x05,0x80,0x51,0xd1,0x01,0x00,0x00,0x00]
331
332 v_cvt_f32_ubyte0_e64 v5, s1 mul:2
333 // CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x08]
334
335 v_cvt_f32_ubyte0_e64 v5, s1 mul:4
336 // CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x10]
337
338 v_cvt_f32_ubyte0_e64 v5, s1 div:2
339 // CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x18]
340
341
342 v_cvt_f32_ubyte1_e64 v5, s1 clamp
343 // CHECK: [0x05,0x80,0x52,0xd1,0x01,0x00,0x00,0x00]
344
345 v_cvt_f32_ubyte1_e64 v5, s1 mul:2
346 // CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x08]
347
348 v_cvt_f32_ubyte1_e64 v5, s1 mul:4
349 // CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x10]
350
351 v_cvt_f32_ubyte1_e64 v5, s1 div:2
352 // CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x18]
353
354
355 v_cvt_f32_ubyte2_e64 v5, s1 clamp
356 // CHECK: [0x05,0x80,0x53,0xd1,0x01,0x00,0x00,0x00]
357
358 v_cvt_f32_ubyte2_e64 v5, s1 mul:2
359 // CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x08]
360
361 v_cvt_f32_ubyte2_e64 v5, s1 mul:4
362 // CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x10]
363
364 v_cvt_f32_ubyte2_e64 v5, s1 div:2
365 // CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x18]
366
367
368 v_cvt_f32_ubyte3_e64 v5, s1 clamp
369 // CHECK: [0x05,0x80,0x54,0xd1,0x01,0x00,0x00,0x00]
370
371 v_cvt_f32_ubyte3_e64 v5, s1 mul:2
372 // CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x08]
373
374 v_cvt_f32_ubyte3_e64 v5, s1 mul:4
375 // CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x10]
376
377 v_cvt_f32_ubyte3_e64 v5, s1 div:2
378 // CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x18]
379
380
381 // NB: output modifiers are not supported for f16
382 v_cvt_f16_i16_e64 v5, s1 clamp
383 // CHECK: [0x05,0x80,0x7a,0xd1,0x01,0x00,0x00,0x00]
384
385 // NB: output modifiers are not supported for f16
386 v_cvt_f16_u16_e64 v5, s1 clamp
387 // CHECK: [0x05,0x80,0x79,0xd1,0x01,0x00,0x00,0x00]