llvm.org GIT mirror llvm / c30df5f
[X86] Allow legacy vcvtps2ph intrinsics to select EVEX encoded instructions. Rely on EVEX->VEX to convert back. Missed store folding opportunities will be fixed in a subsequent commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317661 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 1 year, 11 months ago
3 changed file(s) with 33 addition(s) and 31 deletion(s). Raw diff Collapse all Expand all
76977697 T8PD, VEX, Sched<[WriteCvtF2FLd]>;
76987698 }
76997699
7700 multiclass f16c_ps2ph, Intrinsic Int> {
7700 multiclass f16c_ps2ph> {
77017701 def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
77027702 (ins RC:$src1, i32u8imm:$src2),
77037703 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7704 [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
7704 [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
77057705 TAPD, VEX, Sched<[WriteCvtF2F]>;
77067706 let hasSideEffects = 0, mayStore = 1,
77077707 SchedRW = [WriteCvtF2FLd, WriteRMW] in
77147714 let Predicates = [HasF16C, NoVLX] in {
77157715 defm VCVTPH2PS : f16c_ph2ps;
77167716 defm VCVTPH2PSY : f16c_ph2ps, VEX_L;
7717 defm VCVTPS2PH : f16c_ps2ph;
7718 defm VCVTPS2PHY : f16c_ps2ph, VEX_L;
77177719
77187720 // Pattern match vcvtph2ps of a scalar i64 load.
77197721 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
77237725 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
77247726 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
77257727 (VCVTPH2PSrm addr:$src)>;
7726 }
7727
7728 let Predicates = [HasF16C] in {
7729 defm VCVTPS2PH : f16c_ps2ph;
7730 defm VCVTPS2PHY : f16c_ps2ph, VEX_L;
7731
7732 def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
7733 (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
7734 addr:$dst),
7735 (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
7736 def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16
7737 (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
7738 addr:$dst),
7739 (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
7740 def : Pat<(store (v8i16 (int_x86_vcvtps2ph_256 VR256:$src1, i32:$src2)),
7741 addr:$dst),
7742 (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
7728
7729 def : Pat<(store (f64 (extractelt
7730 (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
7731 (iPTR 0))), addr:$dst),
7732 (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
7733 def : Pat<(store (i64 (extractelt
7734 (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
7735 (iPTR 0))), addr:$dst),
7736 (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
7737 def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst),
7738 (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
77437739 }
77447740
77457741 // Patterns for matching conversions from float to half-float and vice versa.
15871587 X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
15881588 X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
15891589 X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
1590 X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
1591 X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
15901592 X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
15911593 X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
15921594 X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
120120 ;
121121 ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128:
122122 ; X32-AVX512VL: # BB#0:
123 ; X32-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
123 ; X32-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
124124 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
125125 ;
126126 ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128:
127127 ; X64-AVX512VL: # BB#0:
128 ; X64-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
128 ; X64-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
129129 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
130130 %res = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
131131 ret <8 x i16> %res
147147 ;
148148 ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256:
149149 ; X32-AVX512VL: # BB#0:
150 ; X32-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
150 ; X32-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
151151 ; X32-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
152152 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
153153 ;
154154 ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256:
155155 ; X64-AVX512VL: # BB#0:
156 ; X64-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
156 ; X64-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
157157 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
158158 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
159159 %res = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
237237 ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
238238 ; X32-AVX512VL: # BB#0: # %entry
239239 ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
240 ; X32-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%eax) # encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
240 ; X32-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
241241 ; X32-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
242242 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
243243 ;
244244 ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
245245 ; X64-AVX512VL: # BB#0: # %entry
246 ; X64-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%rdi) # encoding: [0xc4,0xe3,0x7d,0x1d,0x07,0x03]
246 ; X64-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x07,0x03]
247247 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
248248 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
249249 entry:
267267 ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
268268 ; X32-AVX512VL: # BB#0: # %entry
269269 ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
270 ; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
270 ; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
271271 ; X32-AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
272272 ; X32-AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
273273 ; X32-AVX512VL-NEXT: vpmovdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x00]
275275 ;
276276 ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
277277 ; X64-AVX512VL: # BB#0: # %entry
278 ; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
278 ; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
279279 ; X64-AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
280280 ; X64-AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
281281 ; X64-AVX512VL-NEXT: vpmovdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07]
302302 ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
303303 ; X32-AVX512VL: # BB#0: # %entry
304304 ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
305 ; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
305 ; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
306 ; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
306307 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
307308 ;
308309 ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
309310 ; X64-AVX512VL: # BB#0: # %entry
310 ; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
311 ; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
312 ; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
311313 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
312314 entry:
313315 %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
332334 ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
333335 ; X32-AVX512VL: # BB#0: # %entry
334336 ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
335 ; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
337 ; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
338 ; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
336339 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
337340 ;
338341 ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
339342 ; X64-AVX512VL: # BB#0: # %entry
340 ; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
343 ; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
344 ; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
341345 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
342346 entry:
343347 %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)