llvm.org GIT mirror llvm / 26e70a7
[X86] Lower extract_element from k-registers by bitcasting from v16i1 to i16 and extending/truncating. This is equivalent to what isel was doing anyway but by canonicalizing earlier we can remove some patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326375 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
5 changed file(s) with 33 addition(s) and 41 deletion(s). Raw diff Collapse all Expand all
1490914909 return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
1491014910 }
1491114911
14912 // Canonicalize result type to MVT::i32.
14913 if (EltVT != MVT::i32) {
14914 SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
14915 Vec, Idx);
14916 return DAG.getAnyExtOrTrunc(Extract, dl, EltVT);
14917 }
14918
1491914912 unsigned IdxVal = cast(Idx)->getZExtValue();
14920
14921 // Extracts from element 0 are always allowed.
14922 if (IdxVal == 0)
14923 return Op;
1492414913
1492514914 // If the kshift instructions of the correct width aren't natively supported
1492614915 // then we need to promote the vector to the native size to get the correct
1492714916 // zeroing behavior.
14928 if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) ||
14929 (VecVT.getVectorNumElements() < 8)) {
14917 if (VecVT.getVectorNumElements() < 16) {
1493014918 VecVT = MVT::v16i1;
14931 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
14932 DAG.getUNDEF(VecVT),
14933 Vec,
14919 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
14920 DAG.getUNDEF(VecVT), Vec,
1493414921 DAG.getIntPtrConstant(0, dl));
1493514922 }
1493614923
14937 // Use kshiftr instruction to move to the lower element.
14938 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
14939 DAG.getConstant(IdxVal, dl, MVT::i8));
14940 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Vec,
14941 DAG.getIntPtrConstant(0, dl));
14924 // Extracts from element 0 are always allowed.
14925 if (IdxVal != 0) {
14926 // Use kshiftr instruction to move to the lower element.
14927 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
14928 DAG.getConstant(IdxVal, dl, MVT::i8));
14929 }
14930
14931 // Shrink to v16i1 since that's always legal.
14932 if (VecVT.getVectorNumElements() > 16) {
14933 VecVT = MVT::v16i1;
14934 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec,
14935 DAG.getIntPtrConstant(0, dl));
14936 }
14937
14938 // Convert to a bitcast+aext/trunc.
14939 MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements());
14940 return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT);
1494214941 }
1494314942
1494414943 SDValue
28572857 multiclass operation_gpr_mask_copy_lowering {
28582858 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
28592859 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2860
2861 def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
2862 (COPY_TO_REGCLASS maskRC:$src, GR32)>;
28632860
28642861 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
28652862 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
456456
457457 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
458458 def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
459 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
460 SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
461 SDTCVecEltisVT<1, i1>,
462 SDTCisPtrTy<2>]>>;
463459
464460 def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
465461
268268 ; SKX-LABEL: test14:
269269 ; SKX: ## %bb.0:
270270 ; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
271 ; SKX-NEXT: kshiftrb $4, %k0, %k0
271 ; SKX-NEXT: kshiftrw $4, %k0, %k0
272272 ; SKX-NEXT: kmovd %k0, %eax
273273 ; SKX-NEXT: testb $1, %al
274274 ; SKX-NEXT: cmoveq %rsi, %rdi
1111 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1212 ; KNL-NEXT: kshiftrw $1, %k0, %k1
1313 ; KNL-NEXT: kmovw %k1, %eax
14 ; KNL-NEXT: kshiftrw $2, %k0, %k1
15 ; KNL-NEXT: kshiftrw $1, %k1, %k2
16 ; KNL-NEXT: kmovw %k1, %ecx
1714 ; KNL-NEXT: testb $1, %al
1815 ; KNL-NEXT: fld1
1916 ; KNL-NEXT: fldz
2017 ; KNL-NEXT: fld %st(0)
2118 ; KNL-NEXT: fcmovne %st(2), %st(0)
22 ; KNL-NEXT: testb $1, %cl
19 ; KNL-NEXT: kshiftrw $2, %k0, %k1
20 ; KNL-NEXT: kshiftrw $1, %k1, %k2
21 ; KNL-NEXT: kmovw %k2, %eax
22 ; KNL-NEXT: testb $1, %al
2323 ; KNL-NEXT: fld %st(1)
2424 ; KNL-NEXT: fcmovne %st(3), %st(0)
25 ; KNL-NEXT: kmovw %k2, %eax
25 ; KNL-NEXT: kmovw %k0, %eax
2626 ; KNL-NEXT: testb $1, %al
2727 ; KNL-NEXT: fld %st(2)
2828 ; KNL-NEXT: fcmovne %st(4), %st(0)
29 ; KNL-NEXT: kmovw %k0, %eax
29 ; KNL-NEXT: kmovw %k1, %eax
3030 ; KNL-NEXT: testb $1, %al
3131 ; KNL-NEXT: fxch %st(3)
3232 ; KNL-NEXT: fcmovne %st(4), %st(0)
3333 ; KNL-NEXT: fstp %st(4)
3434 ; KNL-NEXT: fxch %st(3)
35 ; KNL-NEXT: fstpt 20(%rdi)
36 ; KNL-NEXT: fxch %st(1)
3537 ; KNL-NEXT: fstpt (%rdi)
3638 ; KNL-NEXT: fxch %st(1)
3739 ; KNL-NEXT: fstpt 30(%rdi)
38 ; KNL-NEXT: fxch %st(1)
39 ; KNL-NEXT: fstpt 20(%rdi)
4040 ; KNL-NEXT: fstpt 10(%rdi)
4141 ; KNL-NEXT: vzeroupper
4242 ; KNL-NEXT: retq
5353 ; SKX-NEXT: fldz
5454 ; SKX-NEXT: fld %st(0)
5555 ; SKX-NEXT: fcmovne %st(2), %st(0)
56 ; SKX-NEXT: kshiftrw $1, %k0, %k2
57 ; SKX-NEXT: kmovd %k2, %eax
56 ; SKX-NEXT: kmovd %k1, %eax
5857 ; SKX-NEXT: testb $1, %al
5958 ; SKX-NEXT: fld %st(1)
6059 ; SKX-NEXT: fcmovne %st(3), %st(0)
60 ; SKX-NEXT: kshiftrw $1, %k0, %k1
6161 ; SKX-NEXT: kmovd %k1, %eax
6262 ; SKX-NEXT: testb $1, %al
6363 ; SKX-NEXT: fld %st(2)
7070 ; SKX-NEXT: fxch %st(3)
7171 ; SKX-NEXT: fstpt (%rdi)
7272 ; SKX-NEXT: fxch %st(1)
73 ; SKX-NEXT: fstpt 10(%rdi)
74 ; SKX-NEXT: fxch %st(1)
7375 ; SKX-NEXT: fstpt 20(%rdi)
74 ; SKX-NEXT: fxch %st(1)
75 ; SKX-NEXT: fstpt 10(%rdi)
7676 ; SKX-NEXT: fstpt 30(%rdi)
7777 ; SKX-NEXT: retq
7878 bb: