llvm.org GIT mirror llvm / 999f7b7
Merging r310552: ------------------------------------------------------------------------ r310552 | eladcohen | 2017-08-10 00:44:23 -0700 (Thu, 10 Aug 2017) | 19 lines [SelectionDAG] When scalarizing vselect, don't assert on a legal cond operand. When scalarizing the result of a vselect, the legalizer currently expects to already have scalarized the operands. While this is true for the true/false operands (which have the same type as the result), it is not case for the condition operand. On X86 AVX512, v1i1 is legal - this leads to operations such as '< N x type> vselect < N x i1> < N x type> < N x type>' where < N x type > is illegal to hit an assertion during the scalarization. The handling is similar to r205625. This also exposes the fact that (v1i1 extract_subvector) should be legal and selectable on AVX512 - We do this by custom lowering to vector_extract_elt. This still leaves us in some cases with redundant dag nodes which will be combined in a separate soon to come patch. This fixes pr33349. Differential revision: https://reviews.llvm.org/D36511 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@310635 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 3 years ago
3 changed file(s) with 123 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
301301 }
302302
303303 SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
304 SDValue Cond = GetScalarizedVector(N->getOperand(0));
304 SDValue Cond = N->getOperand(0);
305 EVT OpVT = Cond.getValueType();
306 SDLoc DL(N);
307 // The vselect result and true/value operands needs scalarizing, but it's
308 // not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
309 // See the similar logic in ScalarizeVecRes_VSETCC
310 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
311 Cond = GetScalarizedVector(Cond);
312 } else {
313 EVT VT = OpVT.getVectorElementType();
314 Cond = DAG.getNode(
315 ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
316 DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
317 }
318
305319 SDValue LHS = GetScalarizedVector(N->getOperand(1));
306320 TargetLowering::BooleanContent ScalarBool =
307321 TLI.getBooleanContents(false, false);
13821382 // (result) is 256-bit but the source is 512-bit wide.
13831383 // 128-bit was made Custom under AVX1.
13841384 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1385 MVT::v8f32, MVT::v4f64 })
1385 MVT::v8f32, MVT::v4f64, MVT::v1i1 })
13861386 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
13871387 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
13881388 MVT::v16i1, MVT::v32i1, MVT::v64i1 })
1456814568 SDValue Idx = Op.getOperand(1);
1456914569 unsigned IdxVal = cast(Idx)->getZExtValue();
1457014570 MVT ResVT = Op.getSimpleValueType();
14571
14572 // When v1i1 is legal a scalarization of a vselect with a vXi1 Cond
14573 // would result with: v1i1 = extract_subvector(vXi1, idx).
14574 // Lower these into extract_vector_elt which is already selectable.
14575 if (ResVT == MVT::v1i1) {
14576 assert(Subtarget.hasAVX512() &&
14577 "Boolean EXTRACT_SUBVECTOR requires AVX512");
14578
14579 MVT EltVT = ResVT.getVectorElementType();
14580 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14581 MVT LegalVT =
14582 (TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT();
14583 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx);
14584 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res);
14585 }
1457114586
1457214587 assert((In.getSimpleValueType().is256BitVector() ||
1457314588 In.getSimpleValueType().is512BitVector()) &&
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=KNL
2 ; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=SKX
3
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-linux-gnu"
6
7 define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr {
8 ; KNL-LABEL: test:
9 ; KNL: # BB#0: # %bb
10 ; KNL-NEXT: vpextrb $0, %xmm0, %eax
11 ; KNL-NEXT: testb $1, %al
12 ; KNL-NEXT: fld1
13 ; KNL-NEXT: fldz
14 ; KNL-NEXT: fld %st(0)
15 ; KNL-NEXT: fcmovne %st(2), %st(0)
16 ; KNL-NEXT: vpextrb $4, %xmm0, %eax
17 ; KNL-NEXT: testb $1, %al
18 ; KNL-NEXT: fld %st(1)
19 ; KNL-NEXT: fcmovne %st(3), %st(0)
20 ; KNL-NEXT: vpextrb $8, %xmm0, %eax
21 ; KNL-NEXT: testb $1, %al
22 ; KNL-NEXT: fld %st(2)
23 ; KNL-NEXT: fcmovne %st(4), %st(0)
24 ; KNL-NEXT: vpextrb $12, %xmm0, %eax
25 ; KNL-NEXT: testb $1, %al
26 ; KNL-NEXT: fxch %st(3)
27 ; KNL-NEXT: fcmovne %st(4), %st(0)
28 ; KNL-NEXT: fstp %st(4)
29 ; KNL-NEXT: fxch %st(3)
30 ; KNL-NEXT: fstpt 30(%rdi)
31 ; KNL-NEXT: fxch %st(1)
32 ; KNL-NEXT: fstpt 20(%rdi)
33 ; KNL-NEXT: fxch %st(1)
34 ; KNL-NEXT: fstpt 10(%rdi)
35 ; KNL-NEXT: fstpt (%rdi)
36 ; KNL-NEXT: retq
37 ;
38 ; SKX-LABEL: test:
39 ; SKX: # BB#0: # %bb
40 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
41 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
42 ; SKX-NEXT: kshiftrw $2, %k0, %k1
43 ; SKX-NEXT: kshiftlw $15, %k1, %k2
44 ; SKX-NEXT: kshiftrw $15, %k2, %k2
45 ; SKX-NEXT: kshiftlw $15, %k2, %k2
46 ; SKX-NEXT: kshiftrw $15, %k2, %k2
47 ; SKX-NEXT: kmovd %k2, %eax
48 ; SKX-NEXT: testb $1, %al
49 ; SKX-NEXT: fld1
50 ; SKX-NEXT: fldz
51 ; SKX-NEXT: fld %st(0)
52 ; SKX-NEXT: fcmovne %st(2), %st(0)
53 ; SKX-NEXT: kshiftlw $14, %k1, %k1
54 ; SKX-NEXT: kshiftrw $15, %k1, %k1
55 ; SKX-NEXT: kshiftlw $15, %k1, %k1
56 ; SKX-NEXT: kshiftrw $15, %k1, %k1
57 ; SKX-NEXT: kmovd %k1, %eax
58 ; SKX-NEXT: testb $1, %al
59 ; SKX-NEXT: fld %st(1)
60 ; SKX-NEXT: fcmovne %st(3), %st(0)
61 ; SKX-NEXT: kshiftlw $15, %k0, %k1
62 ; SKX-NEXT: kshiftrw $15, %k1, %k1
63 ; SKX-NEXT: kshiftlw $15, %k1, %k1
64 ; SKX-NEXT: kshiftrw $15, %k1, %k1
65 ; SKX-NEXT: kmovd %k1, %eax
66 ; SKX-NEXT: testb $1, %al
67 ; SKX-NEXT: fld %st(2)
68 ; SKX-NEXT: fcmovne %st(4), %st(0)
69 ; SKX-NEXT: kshiftlw $14, %k0, %k0
70 ; SKX-NEXT: kshiftrw $15, %k0, %k0
71 ; SKX-NEXT: kshiftlw $15, %k0, %k0
72 ; SKX-NEXT: kshiftrw $15, %k0, %k0
73 ; SKX-NEXT: kmovd %k0, %eax
74 ; SKX-NEXT: testb $1, %al
75 ; SKX-NEXT: fxch %st(3)
76 ; SKX-NEXT: fcmovne %st(4), %st(0)
77 ; SKX-NEXT: fstp %st(4)
78 ; SKX-NEXT: fxch %st(3)
79 ; SKX-NEXT: fstpt 10(%rdi)
80 ; SKX-NEXT: fxch %st(1)
81 ; SKX-NEXT: fstpt (%rdi)
82 ; SKX-NEXT: fxch %st(1)
83 ; SKX-NEXT: fstpt 30(%rdi)
84 ; SKX-NEXT: fstpt 20(%rdi)
85 ; SKX-NEXT: retq
86 bb:
87 %tmp = select <4 x i1> %m, <4 x x86_fp80> , <4 x x86_fp80> zeroinitializer
88 store <4 x x86_fp80> %tmp, <4 x x86_fp80>* %p, align 16
89 ret void
90 }
91