llvm.org GIT mirror llvm / 1f0ddef
[DagCombine] Improve DAGCombiner BUILD_VECTOR when it has two sources of elements This partially fixes PR21943. For AVX, we go from: vmovq (%rsi), %xmm0 vmovq (%rdi), %xmm1 vpermilps $-27, %xmm1, %xmm2 ## xmm2 = xmm1[1,1,2,3] vinsertps $16, %xmm2, %xmm1, %xmm1 ## xmm1 = xmm1[0],xmm2[0],xmm1[2,3] vinsertps $32, %xmm0, %xmm1, %xmm1 ## xmm1 = xmm1[0,1],xmm0[0],xmm1[3] vpermilps $-27, %xmm0, %xmm0 ## xmm0 = xmm0[1,1,2,3] vinsertps $48, %xmm0, %xmm1, %xmm0 ## xmm0 = xmm1[0,1,2],xmm0[0] To the expected: vmovq (%rdi), %xmm0 vmovhpd (%rsi), %xmm0, %xmm0 retq Fixing this for AVX2 is still open. Differential Revision: http://reviews.llvm.org/D6749 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224759 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Kuperstein 5 years ago
2 changed file(s) with 42 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
1083110831
1083210832 // If everything is good, we can make a shuffle operation.
1083310833 if (VecIn1.getNode()) {
10834 unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
1083410835 SmallVector Mask;
1083510836 for (unsigned i = 0; i != NumInScalars; ++i) {
1083610837 unsigned Opcode = N->getOperand(i).getOpcode();
1085710858 continue;
1085810859 }
1085910860
10860 // Otherwise, use InIdx + VecSize
10861 Mask.push_back(NumInScalars+ExtIndex);
10861 // Otherwise, use InIdx + InputVecSize
10862 Mask.push_back(InNumElements + ExtIndex);
1086210863 }
1086310864
1086410865 // Avoid introducing illegal shuffles with zero.
1086810869 // We can't generate a shuffle node with mismatched input and output types.
1086910870 // Attempt to transform a single input vector to the correct type.
1087010871 if ((VT != VecIn1.getValueType())) {
10871 // We don't support shuffeling between TWO values of different types.
10872 if (VecIn2.getNode())
10873 return SDValue();
10874
1087510872 // If the input vector type has a different base type to the output
1087610873 // vector type, bail out.
10877 if (VecIn1.getValueType().getVectorElementType() !=
10878 VT.getVectorElementType())
10874 EVT VTElemType = VT.getVectorElementType();
10875 if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
10876 (VecIn2.getNode() &&
10877 (VecIn2.getValueType().getVectorElementType() != VTElemType)))
1087910878 return SDValue();
1088010879
1088110880 // If the input vector is too small, widen it.
1088310882 // output registers. For example XMM->YMM widening on X86 with AVX.
1088410883 EVT VecInT = VecIn1.getValueType();
1088510884 if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
10886 // Widen the input vector by adding undef values.
10887 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
10888 VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
10885 // If we only have one small input, widen it by adding undef values.
10886 if (!VecIn2.getNode())
10887 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
10888 DAG.getUNDEF(VecIn1.getValueType()));
10889 else if (VecIn1.getValueType() == VecIn2.getValueType()) {
10890 // If we have two small inputs of the same type, try to concat them.
10891 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
10892 VecIn2 = SDValue(nullptr, 0);
10893 } else
10894 return SDValue();
1088910895 } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
1089010896 // If the input vector is too large, try to split it.
10897 // We don't support having two input vectors that are too large.
10898 if (VecIn2.getNode())
10899 return SDValue();
10900
1089110901 if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
1089210902 return SDValue();
1089310903
1089810908 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
1089910909 DAG.getConstant(0, TLI.getVectorIdxTy()));
1090010910 UsesZeroVector = false;
10901 } else
10911 } else
1090210912 return SDValue();
1090310913 }
1090410914
15831583 ret <4 x i32> %2
15841584 }
15851585
1586 define <8 x float> @combine_test22(<2 x float>* %a, <2 x float>* %b) {
1587 ; SSE-LABEL: combine_test22:
1588 ; SSE: # BB#0:
1589 ; SSE-NEXT: movq (%rdi), %xmm0
1590 ; SSE-NEXT: movhpd (%rsi), %xmm0
1591 ; SSE-NEXT: retq
1592 ;
1593 ; AVX1-LABEL: combine_test22:
1594 ; AVX1: # BB#0:
1595 ; AVX1-NEXT: vmovq (%rdi), %xmm0
1596 ; AVX1-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
1597 ; AVX1-NEXT: retq
1598 ;
1599 ; Current AVX2 lowering of this is still awful, not adding a test case.
1600 %1 = load <2 x float>* %a, align 8
1601 %2 = load <2 x float>* %b, align 8
1602 %3 = shufflevector <2 x float> %1, <2 x float> %2, <8 x i32>
1603 ret <8 x float> %3
1604 }
1605
15861606 ; Check some negative cases.
15871607 ; FIXME: Do any of these really make sense? Are they redundant with the above tests?
15881608