llvm.org GIT mirror llvm / eb63de6
[SelectionDAG][X86] Don't use SEXTLOAD for promoting masked loads in the type legalizer Summary: I'm not sure why we were using SEXTLOAD. EXTLOAD seems more appropriate since we don't care about the upper bits. This patch changes this and then modifies the X86 post legalization combine to emit a extending shuffle instead of a sign_extend_vector_inreg. Could maybe use an any_extend_vector_inreg, but I just did what we already do in LowerLoad. I think we can actually get rid of this code entirely if we switch to -x86-experimental-vector-widening-legalization. On AVX512 targets I think we might be able to use a masked vpmovzx and not have to expand this at all. Reviewers: RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D57186 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352255 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 1 year, 4 months ago
3 changed file(s) with 20 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
553553 SDLoc dl(N);
554554 SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
555555 N->getMask(), ExtPassThru, N->getMemoryVT(),
556 N->getMemOperand(), ISD::SEXTLOAD);
556 N->getMemOperand(), ISD::EXTLOAD);
557557 // Legalize the chain result - switch anything that used the old chain to
558558 // use the new one.
559559 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
3759137591 return Blend;
3759237592 }
3759337593
37594 if (Mld->getExtensionType() != ISD::SEXTLOAD)
37594 if (Mld->getExtensionType() != ISD::EXTLOAD)
3759537595 return SDValue();
3759637596
3759737597 // Resolve extending loads.
3766137661 Mld->getBasePtr(), NewMask, WidePassThru,
3766237662 Mld->getMemoryVT(), Mld->getMemOperand(),
3766337663 ISD::NON_EXTLOAD);
37664 SDValue NewVec = getExtendInVec(/*Signed*/true, dl, VT, WideLd, DAG);
37665 return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
37664
37665 SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd);
37666 SmallVector ShuffleVec(NumElems * SizeRatio, -1);
37667 for (unsigned i = 0; i != NumElems; ++i)
37668 ShuffleVec[i * SizeRatio] = i;
37669
37670 // Can't shuffle using an illegal type.
37671 assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
37672 "WideVecVT should be legal");
37673 SlicedVec = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
37674 DAG.getUNDEF(WideVecVT), ShuffleVec);
37675 SlicedVec = DAG.getBitcast(VT, SlicedVec);
37676
37677 return DCI.CombineTo(N, SlicedVec, WideLd.getValue(1), true);
3766637678 }
3766737679
3766837680 /// If exactly one element of the mask is set for a non-truncating masked store,
13871387 ; AVX1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2
13881388 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
13891389 ; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1390 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
1390 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13911391 ; AVX1-NEXT: retq
13921392 ;
13931393 ; AVX2-LABEL: load_v2i32_v2i32:
13991399 ; AVX2-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2
14001400 ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
14011401 ; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1402 ; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0
1402 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14031403 ; AVX2-NEXT: retq
14041404 ;
14051405 ; AVX512F-LABEL: load_v2i32_v2i32:
14111411 ; AVX512F-NEXT: kshiftlw $14, %k0, %k0
14121412 ; AVX512F-NEXT: kshiftrw $14, %k0, %k1
14131413 ; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1}
1414 ; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
1414 ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14151415 ; AVX512F-NEXT: vzeroupper
14161416 ; AVX512F-NEXT: retq
14171417 ;
14221422 ; AVX512VLBW-NEXT: vptestnmq %xmm0, %xmm0, %k1
14231423 ; AVX512VLBW-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
14241424 ; AVX512VLBW-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1}
1425 ; AVX512VLBW-NEXT: vpmovsxdq %xmm0, %xmm0
1425 ; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14261426 ; AVX512VLBW-NEXT: retq
14271427 %mask = icmp eq <2 x i32> %trigger, zeroinitializer
14281428 %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)