llvm.org GIT mirror llvm / 3e593b0
[X86] Fix the pattern for merge masked vcvtps2pd. r362199 fixed it for zero masking, but not zero masking. The load folding in the peephole pass hid the bug. This patch turns off the peephole pass on the relevant test to ensure coverage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362440 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 3 months ago
2 changed file(s) with 35 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
76287628 (ins MaskRC:$mask, MemOp:$src),
76297629 OpcodeStr#Alias, "$src", "$src",
76307630 LdDAG,
7631 (vselect MaskRC:$mask,
7632 (_.VT (OpNode (_Src.VT
7633 (_Src.LdFrag addr:$src)))),
7634 _.RC:$src0),
7631 (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
76357632 vselect, "$src0 = $dst">,
76367633 EVEX, Sched<[sched.Folded]>;
76377634
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
1 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
2 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
4 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ
5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
6 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
88
99
1010 define <16 x float> @sitof32(<16 x i32> %a) nounwind {
785785 ret <4 x double> %c
786786 }
787787
788 define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
788 define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1, <4 x double> %passthru) {
789789 ; NOVL-LABEL: f32to4f64_mask_load:
790 ; NOVL: # %bb.0:
791 ; NOVL-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
792 ; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
793 ; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
794 ; NOVL-NEXT: vcvtps2pd (%rdi), %ymm3
795 ; NOVL-NEXT: vcmpltpd %zmm1, %zmm0, %k1
796 ; NOVL-NEXT: vblendmpd %zmm3, %zmm2, %zmm0 {%k1}
797 ; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
798 ; NOVL-NEXT: retq
799 ;
800 ; VL-LABEL: f32to4f64_mask_load:
801 ; VL: # %bb.0:
802 ; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1
803 ; VL-NEXT: vcvtps2pd (%rdi), %ymm2 {%k1}
804 ; VL-NEXT: vmovaps %ymm2, %ymm0
805 ; VL-NEXT: retq
806 %b = load <4 x float>, <4 x float>* %p
807 %a = fpext <4 x float> %b to <4 x double>
808 %mask = fcmp ogt <4 x double> %a1, %b1
809 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> %passthru
810 ret <4 x double> %c
811 }
812
813 define <4 x double> @f32to4f64_maskz_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
814 ; NOVL-LABEL: f32to4f64_maskz_load:
790815 ; NOVL: # %bb.0:
791816 ; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
792817 ; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
796821 ; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
797822 ; NOVL-NEXT: retq
798823 ;
799 ; VL-LABEL: f32to4f64_mask_load:
824 ; VL-LABEL: f32to4f64_maskz_load:
800825 ; VL: # %bb.0:
801826 ; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1
802827 ; VL-NEXT: vcvtps2pd (%rdi), %ymm0 {%k1} {z}