llvm.org GIT mirror llvm / a20244d
[AVX] Fix mask predicates for 256-bit UNPCKLPS/D and implement missing patterns for them. Add a SIMD test subdirectory to hold tests for SIMD instruction selection correctness and quality. ' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126845 91177308-0d34-0410-b5e6-96231b3b80d8 David Greene 9 years ago
9 changed file(s) with 164 addition(s) and 25 deletion(s). Raw diff Collapse all Expand all
164164 /// datatypes and vector widths.
165165 void DecodeUNPCKLPMask(EVT VT,
166166 SmallVectorImpl &ShuffleMask) {
167 unsigned NumElts = VT.getVectorNumElements();
167168
168 unsigned NElts = VT.getVectorNumElements();
169 // Handle vector lengths > 128 bits. Define a "section" as a set of
170 // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
171 // sections.
172 unsigned NumSections = VT.getSizeInBits() / 128;
173 if (NumSections == 0 ) NumSections = 1; // Handle MMX
174 unsigned NumSectionElts = NumElts / NumSections;
169175
170 for (unsigned i = 0; i != NElts/2; ++i) {
171 ShuffleMask.push_back(i); // Reads from dest
172 ShuffleMask.push_back(i+NElts); // Reads from src
176 unsigned Start = 0;
177 unsigned End = NumSectionElts / 2;
178 for (unsigned s = 0; s < NumSections; ++s) {
179 for (unsigned i = Start; i != End; ++i) {
180 ShuffleMask.push_back(i); // Reads from dest/src1
181 ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2
182 }
183 // Process the next 128 bits.
184 Start += NumSectionElts;
185 End += NumSectionElts;
173186 }
174187 }
175188
31723172 bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
31733173 unsigned NumElems = N->getValueType(0).getVectorNumElements();
31743174
3175 if (NumElems != 2 && NumElems != 4)
3175 if ((NumElems != 2 && NumElems != 4)
3176 || N->getValueType(0).getSizeInBits() > 128)
31763177 return false;
31773178
31783179 for (unsigned i = 0; i < NumElems/2; ++i)
31943195 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
31953196 return false;
31963197
3197 for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
3198 int BitI = Mask[i];
3199 int BitI1 = Mask[i+1];
3200 if (!isUndefOrEqual(BitI, j))
3201 return false;
3202 if (V2IsSplat) {
3203 if (!isUndefOrEqual(BitI1, NumElts))
3198 // Handle vector lengths > 128 bits. Define a "section" as a set of
3199 // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
3200 // sections.
3201 unsigned NumSections = VT.getSizeInBits() / 128;
3202 if (NumSections == 0 ) NumSections = 1; // Handle MMX
3203 unsigned NumSectionElts = NumElts / NumSections;
3204
3205 unsigned Start = 0;
3206 unsigned End = NumSectionElts;
3207 for (unsigned s = 0; s < NumSections; ++s) {
3208 for (unsigned i = Start, j = s * NumSectionElts;
3209 i != End;
3210 i += 2, ++j) {
3211 int BitI = Mask[i];
3212 int BitI1 = Mask[i+1];
3213 if (!isUndefOrEqual(BitI, j))
32043214 return false;
3205 } else {
3206 if (!isUndefOrEqual(BitI1, j + NumElts))
3207 return false;
3208 }
3209 }
3215 if (V2IsSplat) {
3216 if (!isUndefOrEqual(BitI1, NumElts))
3217 return false;
3218 } else {
3219 if (!isUndefOrEqual(BitI1, j + NumElts))
3220 return false;
3221 }
3222 }
3223 // Process the next 128 bits.
3224 Start += NumSectionElts;
3225 End += NumSectionElts;
3226 }
3227
32103228 return true;
32113229 }
32123230
32543272 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
32553273 return false;
32563274
3257 for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
3258 int BitI = Mask[i];
3259 int BitI1 = Mask[i+1];
3260 if (!isUndefOrEqual(BitI, j))
3261 return false;
3262 if (!isUndefOrEqual(BitI1, j))
3263 return false;
3264 }
3275 // Handle vector lengths > 128 bits. Define a "section" as a set of
3276 // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
3277 // sections.
3278 unsigned NumSections = VT.getSizeInBits() / 128;
3279 if (NumSections == 0 ) NumSections = 1; // Handle MMX
3280 unsigned NumSectionElts = NumElems / NumSections;
3281
3282 for (unsigned s = 0; s < NumSections; ++s) {
3283 for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
3284 i != NumSectionElts * (s + 1);
3285 i += 2, ++j) {
3286 int BitI = Mask[i];
3287 int BitI1 = Mask[i+1];
3288
3289 if (!isUndefOrEqual(BitI, j))
3290 return false;
3291 if (!isUndefOrEqual(BitI1, j))
3292 return false;
3293 }
3294 }
3295
32653296 return true;
32663297 }
32673298
131131
132132 def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
133133 def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
134 def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
135 def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
134136 def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
135137 def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
136138
56215621 // Shuffle with UNPCKLPS
56225622 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
56235623 (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
5624 def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
5625 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
56245626 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
56255627 (UNPCKLPSrm VR128:$src1, addr:$src2)>;
56265628
56275629 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
56285630 (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
5631 def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
5632 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
56295633 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
56305634 (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
56315635
56435647 // Shuffle with UNPCKLPD
56445648 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
56455649 (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
5650 def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
5651 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
56465652 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
56475653 (UNPCKLPDrm VR128:$src1, addr:$src2)>;
56485654
56495655 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
56505656 (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
5657 def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
5658 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
56515659 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
56525660 (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
56535661
0 load_lib llvm.exp
1
2 if { [llvm_supports_target X86] } {
3 RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
4 }
0 ; RUN: llc < %s -mattr=+avx | FileCheck %s
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
3 target triple = "x86_64-unknown-linux-gnu"
4
5 define void @try_([2 x <4 x double>]* noalias %incarray, [2 x <4 x double>]* noalias %incarrayb ) {
6 entry:
7 %incarray1 = alloca [2 x <4 x double>]*, align 8
8 %incarrayb1 = alloca [2 x <4 x double>]*, align 8
9 %carray = alloca [2 x <4 x double>], align 16
10 %r = getelementptr [2 x <4 x double>]* %incarray, i32 0, i32 0
11 %rb = getelementptr [2 x <4 x double>]* %incarrayb, i32 0, i32 0
12 %r3 = load <4 x double>* %r, align 8
13 %r4 = load <4 x double>* %rb, align 8
14 %r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x double>> [#uses=1]
15 ; CHECK-NOT: vunpcklpd
16 %r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1
17 store <4 x double> %r11, <4 x double>* %r12, align 4
18 ret void
19 }
0 ; RUN: llc < %s -mattr=+avx | FileCheck %s
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
3 target triple = "x86_64-unknown-linux-gnu"
4
5 define void @try_([2 x <8 x float>]* noalias %incarray, [2 x <8 x float>]* noalias %incarrayb ) {
6 enmtry:
7 %incarray1 = alloca [2 x <8 x float>]*, align 8
8 %incarrayb1 = alloca [2 x <8 x float>]*, align 8
9 %carray = alloca [2 x <8 x float>], align 16
10 %r = getelementptr [2 x <8 x float>]* %incarray, i32 0, i32 0
11 %rb = getelementptr [2 x <8 x float>]* %incarrayb, i32 0, i32 0
12 %r3 = load <8 x float>* %r, align 8
13 %r4 = load <8 x float>* %rb, align 8
14 %r8 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x float>> [#uses=1]
15 ; CHECK-NOT: vunpcklps
16 %r9 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 0
17 store <8 x float> %r8, <8 x float>* %r9, align 4
18 ret void
19 }
0 ; RUN: llc < %s -mattr=+avx | FileCheck %s
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
3 target triple = "x86_64-unknown-linux-gnu"
4
5 define void @try_([2 x <4 x double>]* noalias %incarray, [2 x <4 x double>]* noalias %incarrayb ) {
6 entry:
7 %incarray1 = alloca [2 x <4 x double>]*, align 8
8 %incarrayb1 = alloca [2 x <4 x double>]*, align 8
9 %carray = alloca [2 x <4 x double>], align 16
10 %r = getelementptr [2 x <4 x double>]* %incarray, i32 0, i32 0
11 %rb = getelementptr [2 x <4 x double>]* %incarrayb, i32 0, i32 0
12 %r3 = load <4 x double>* %r, align 8
13 %r4 = load <4 x double>* %rb, align 8
14 %r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 2, i32 6 > ; <<4 x double>> [#uses=1]
15 ; CHECK: vunpcklpd
16 %r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1
17 store <4 x double> %r11, <4 x double>* %r12, align 4
18 ret void
19 }
0 ; RUN: llc < %s -mattr=+avx | FileCheck %s
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
3 target triple = "x86_64-unknown-linux-gnu"
4
5 define void @try_([2 x <8 x float>]* noalias %incarray, [2 x <8 x float>]* noalias %incarrayb ) {
6 entry:
7 %incarray1 = alloca [2 x <8 x float>]*, align 8
8 %incarrayb1 = alloca [2 x <8 x float>]*, align 8
9 %carray = alloca [2 x <8 x float>], align 16
10 %r = getelementptr [2 x <8 x float>]* %incarray, i32 0, i32 0
11 %rb = getelementptr [2 x <8 x float>]* %incarrayb, i32 0, i32 0
12 %r3 = load <8 x float>* %r, align 8
13 %r4 = load <8 x float>* %rb, align 8
14 %r11 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13 > ; <<8 x float>> [#uses=1]
15 ; CHECK: vunpcklps
16 %r12 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 1
17 store <8 x float> %r11, <8 x float>* %r12, align 4
18 ret void
19 }