llvm.org GIT mirror llvm / 3b4235d
Merging r366660 and r367306: ------------------------------------------------------------------------ r366660 | rksimon | 2019-07-21 21:04:44 +0200 (Sun, 21 Jul 2019) | 3 lines [X86] SimplifyDemandedVectorEltsForTargetNode - Move SUBV_BROADCAST narrowing handling. NFCI. Move the narrowing of SUBV_BROADCAST to where we handle all the other opcodes. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r367306 | rksimon | 2019-07-30 13:35:13 +0200 (Tue, 30 Jul 2019) | 5 lines [X86][AVX] SimplifyDemandedVectorElts - handle extraction from X86ISD::SUBV_BROADCAST source (PR42819) PR42819 showed an issue that we couldn't handle the case where we demanded a 'sub-sub-vector' of the SUBV_BROADCAST 'sub-vector' source. This patch recognizes these cases and extracts the sub-sub-vector instead of trying to broadcast to a type smaller than the 'sub-vector' source. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_90@367991 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 1 year, 3 months ago
2 changed file(s) with 47 addition(s) and 19 deletion(s). Raw diff Collapse all Expand all
3406134061 return true;
3406234062 break;
3406334063 }
34064 case X86ISD::SUBV_BROADCAST: {
34065 // Reduce size of broadcast if we don't need the upper half.
34066 unsigned HalfElts = NumElts / 2;
34067 if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) {
34068 SDValue Src = Op.getOperand(0);
34069 MVT SrcVT = Src.getSimpleValueType();
34070
34071 SDValue Half = Src;
34072 if (SrcVT.getVectorNumElements() != HalfElts) {
34073 MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts);
34074 Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src);
34075 }
34076
34077 return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0,
34078 TLO.DAG, SDLoc(Op),
34079 Half.getValueSizeInBits()));
34080 }
34081 break;
34082 }
3408334064 case X86ISD::VPERMV: {
3408434065 SDValue Mask = Op.getOperand(0);
3408534066 APInt MaskUndef, MaskZero;
3413334114 SDValue Insert =
3413434115 insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
3413534116 return TLO.CombineTo(Op, Insert);
34117 }
34118 // Subvector broadcast.
34119 case X86ISD::SUBV_BROADCAST: {
34120 SDLoc DL(Op);
34121 SDValue Src = Op.getOperand(0);
34122 if (Src.getValueSizeInBits() > ExtSizeInBits)
34123 Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits);
34124 else if (Src.getValueSizeInBits() < ExtSizeInBits) {
34125 MVT SrcSVT = Src.getSimpleValueType().getScalarType();
34126 MVT SrcVT =
34127 MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits());
34128 Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src);
34129 }
34130 return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0,
34131 TLO.DAG, DL, ExtSizeInBits));
3413634132 }
3413734133 // Byte shifts by immediate.
3413834134 case X86ISD::VSHLDQ:
157157 store <4 x float> %5, <4 x float>* %13, align 16
158158 ret void
159159 }
160
161 define <16 x i32> @PR42819(<8 x i32>* %a0) {
162 ; SSE-LABEL: PR42819:
163 ; SSE: # %bb.0:
164 ; SSE-NEXT: movdqu (%rdi), %xmm3
165 ; SSE-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7,8,9,10,11]
166 ; SSE-NEXT: xorps %xmm0, %xmm0
167 ; SSE-NEXT: xorps %xmm1, %xmm1
168 ; SSE-NEXT: xorps %xmm2, %xmm2
169 ; SSE-NEXT: retq
170 ;
171 ; AVX-LABEL: PR42819:
172 ; AVX: # %bb.0:
173 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,1,2]
174 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
175 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
176 ; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm0[5,6,7]
177 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
178 ; AVX-NEXT: retq
179 ;
180 ; AVX512-LABEL: PR42819:
181 ; AVX512: # %bb.0:
182 ; AVX512-NEXT: vmovdqu (%rdi), %xmm0
183 ; AVX512-NEXT: movw $-8192, %ax # imm = 0xE000
184 ; AVX512-NEXT: kmovw %eax, %k1
185 ; AVX512-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
186 ; AVX512-NEXT: retq
187 %1 = load <8 x i32>, <8 x i32>* %a0, align 4
188 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32>
189 %3 = shufflevector <16 x i32> zeroinitializer, <16 x i32> %2, <16 x i32>
190 ret <16 x i32> %3
191 }