llvm.org GIT mirror llvm / cde4a1a
Use fp unpack instructions to unpack int types. Until we have AVX2, this is the best we can do for these patterns. This fix PR10554. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137161 91177308-0d34-0410-b5e6-96231b3b80d8 Bruno Cardoso Lopes 8 years ago
3 changed file(s) with 71 addition(s) and 14 deletion(s). Raw diff Collapse all Expand all
59395939 case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
59405940 case MVT::v4f32: return X86ISD::UNPCKLPS;
59415941 case MVT::v2f64: return X86ISD::UNPCKLPD;
5942 case MVT::v8i32: // Use fp unit for int unpack.
59425943 case MVT::v8f32: return X86ISD::VUNPCKLPSY;
5944 case MVT::v4i64: // Use fp unit for int unpack.
59435945 case MVT::v4f64: return X86ISD::VUNPCKLPDY;
59445946 case MVT::v16i8: return X86ISD::PUNPCKLBW;
59455947 case MVT::v8i16: return X86ISD::PUNPCKLWD;
59555957 case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
59565958 case MVT::v4f32: return X86ISD::UNPCKHPS;
59575959 case MVT::v2f64: return X86ISD::UNPCKHPD;
5960 case MVT::v8i32: // Use fp unit for int unpack.
59585961 case MVT::v8f32: return X86ISD::VUNPCKHPSY;
5962 case MVT::v4i64: // Use fp unit for int unpack.
59595963 case MVT::v4f64: return X86ISD::VUNPCKHPDY;
59605964 case MVT::v16i8: return X86ISD::PUNPCKHBW;
59615965 case MVT::v8i16: return X86ISD::PUNPCKHWD;
57985798 // Shuffle with UNPCKLPS
57995799 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
58005800 (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
5801 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
5802 (UNPCKLPSrm VR128:$src1, addr:$src2)>;
5803
5804 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
5805 (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
5806 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
5807 (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
5808
5809 // Shuffle with VUNPCKHPSY
58015810 def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
58025811 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
5803 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
5804 (UNPCKLPSrm VR128:$src1, addr:$src2)>;
5805
5806 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
5807 (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
58085812 def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
58095813 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
5810 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
5811 (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
5814 def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
5815 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
5816 def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))),
5817 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
58125818
58135819 // Shuffle with UNPCKHPS
58145820 def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
58275833 def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
58285834 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
58295835
5836 def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))),
5837 (VUNPCKHPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
5838 def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
5839 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
5840
58305841 // Shuffle with UNPCKLPD
58315842 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
58325843 (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
5844 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
5845 (UNPCKLPDrm VR128:$src1, addr:$src2)>;
5846
5847 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
5848 (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
5849 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
5850 (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
5851
5852 // Shuffle with VUNPCKLPDY
58335853 def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
58345854 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
5835 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
5836 (UNPCKLPDrm VR128:$src1, addr:$src2)>;
5837
5838 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
5839 (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
58405855 def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
58415856 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
5842 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
5843 (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
5857
5858 def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))),
5859 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
5860 def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
5861 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
58445862
58455863 // Shuffle with UNPCKHPD
58465864 def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
58575875 def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
58585876 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
58595877 def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
5878 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
5879 def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))),
5880 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
5881 def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
58605882 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
58615883
58625884 // Shuffle with MOVLHPS
5555 ret <4 x double> %shuffle.i
5656 }
5757
58 ;;;;
59 ;;;; Unpack versions using the fp unit for int unpacking
60 ;;;;
61
62 ; CHECK: vunpckhps
63 define <8 x i32> @unpackhips1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
64 entry:
65 %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32>
66 ret <8 x i32> %shuffle.i
67 }
68
69 ; CHECK: vunpckhpd
70 define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
71 entry:
72 %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32>
73 ret <4 x i64> %shuffle.i
74 }
75
76 ; CHECK: vunpcklps
77 define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
78 entry:
79 %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32>
80 ret <8 x i32> %shuffle.i
81 }
82
83 ; CHECK: vunpcklpd
84 define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
85 entry:
86 %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32>
87 ret <4 x i64> %shuffle.i
88 }