llvm.org GIT mirror llvm / afc97ae
Merging r226808: ------------------------------------------------------------------------ r226808 | delena | 2015-01-22 04:07:59 -0800 (Thu, 22 Jan 2015) | 10 lines Fixed a bug in type legalizer for masked load/store intrinsics. The problem occurs when after vectorization we have type <2 x i32>. This type is promoted to <2 x i64> and then requires additional efforts for expanding loads and truncating stores. I added EXPAND / TRUNCATE attributes to the masked load/store SDNodes. The code now contains additional shuffles. I've prepared changes in the cost estimation for masked memory operations, it will be submitted separately. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@229561 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 5 years ago
10 changed file(s) with 281 addition(s) and 56 deletion(s). Raw diff Collapse all Expand all
866866 SDValue Offset, ISD::MemIndexedMode AM);
867867
868868 SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
869 SDValue Mask, SDValue Src0, MachineMemOperand *MMO);
869 SDValue Mask, SDValue Src0, EVT MemVT,
870 MachineMemOperand *MMO, ISD::LoadExtType);
870871 SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
871 SDValue Ptr, SDValue Mask, MachineMemOperand *MMO);
872 SDValue Ptr, SDValue Mask, EVT MemVT,
873 MachineMemOperand *MMO, bool IsTrunc);
872874 /// getSrcValue - Construct a node to track a Value* through the backend.
873875 SDValue getSrcValue(const Value *v);
874876
19691969 class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
19701970 public:
19711971 friend class SelectionDAG;
1972 MaskedLoadSDNode(unsigned Order, DebugLoc dl,
1973 SDValue *Operands, unsigned numOperands,
1974 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1972 MaskedLoadSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
1973 unsigned numOperands, SDVTList VTs, ISD::LoadExtType ETy,
1974 EVT MemVT, MachineMemOperand *MMO)
19751975 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
1976 VTs, MemVT, MMO)
1977 {}
1978
1976 VTs, MemVT, MMO) {
1977 SubclassData |= (unsigned short)ETy;
1978 }
1979
1980 ISD::LoadExtType getExtensionType() const {
1981 return ISD::LoadExtType(SubclassData & 3);
1982 }
19791983 const SDValue &getSrc0() const { return getOperand(3); }
19801984 static bool classof(const SDNode *N) {
19811985 return N->getOpcode() == ISD::MLOAD;
19881992
19891993 public:
19901994 friend class SelectionDAG;
1991 MaskedStoreSDNode(unsigned Order, DebugLoc dl,
1992 SDValue *Operands, unsigned numOperands,
1993 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1995 MaskedStoreSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
1996 unsigned numOperands, SDVTList VTs, bool isTrunc, EVT MemVT,
1997 MachineMemOperand *MMO)
19941998 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
1995 VTs, MemVT, MMO)
1996 {}
1997
1998 const SDValue &getData() const { return getOperand(3); }
1999 VTs, MemVT, MMO) {
2000 SubclassData |= (unsigned short)isTrunc;
2001 }
2002 /// isTruncatingStore - Return true if the op does a truncation before store.
2003 /// For integers this is the same as doing a TRUNCATE and storing the result.
2004 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2005 bool isTruncatingStore() const { return SubclassData & 1; }
2006
2007 const SDValue &getValue() const { return getOperand(3); }
19992008
20002009 static bool classof(const SDNode *N) {
20012010 return N->getOpcode() == ISD::MSTORE;
48414841
48424842 MaskedStoreSDNode *MST = dyn_cast(N);
48434843 SDValue Mask = MST->getMask();
4844 SDValue Data = MST->getData();
4844 SDValue Data = MST->getValue();
48454845 SDLoc DL(N);
48464846
48474847 // If the MSTORE data type requires splitting and the mask is provided by a
48844884 MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
48854885 Alignment, MST->getAAInfo(), MST->getRanges());
48864886
4887 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO);
4887 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
4888 MST->isTruncatingStore());
48884889
48894890 unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
48904891 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
48964897 SecondHalfAlignment, MST->getAAInfo(),
48974898 MST->getRanges());
48984899
4899 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO);
4900 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
4901 MST->isTruncatingStore());
49004902
49014903 AddToWorklist(Lo.getNode());
49024904 AddToWorklist(Hi.getNode());
49574959 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
49584960 Alignment, MLD->getAAInfo(), MLD->getRanges());
49594961
4960 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO);
4962 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
4963 ISD::NON_EXTLOAD);
49614964
49624965 unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
49634966 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
49684971 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
49694972 SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
49704973
4971 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO);
4974 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
4975 ISD::NON_EXTLOAD);
49724976
49734977 AddToWorklist(Lo.getNode());
49744978 AddToWorklist(Hi.getNode());
94819485 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
94829486 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
94839487 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
9488 // The narrowing should be profitable, the load/store operation should be
9489 // legal (or custom) and the store size should be equal to the NewVT width.
94849490 while (NewBW < BitWidth &&
94859491 !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
94869492 TLI.isNarrowingProfitable(VT, NewVT))) {
457457 SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
458458 EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
459459 SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
460 SDValue ExtMask = PromoteTargetBoolean(N->getMask(), NVT);
461 SDLoc dl(N);
462
463 MachineMemOperand *MMO = DAG.getMachineFunction().
464 getMachineMemOperand(N->getPointerInfo(),
465 MachineMemOperand::MOLoad, NVT.getStoreSize(),
466 N->getAlignment(), N->getAAInfo(), N->getRanges());
460
461 SDValue Mask = N->getMask();
462 EVT NewMaskVT = getSetCCResultType(NVT);
463 if (NewMaskVT != N->getMask().getValueType())
464 Mask = PromoteTargetBoolean(Mask, NewMaskVT);
465 SDLoc dl(N);
467466
468467 SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
469 ExtMask, ExtSrc0, MMO);
468 Mask, ExtSrc0, N->getMemoryVT(),
469 N->getMemOperand(), ISD::SEXTLOAD);
470470 // Legalized the chain result - switch anything that used the old chain to
471471 // use the new one.
472472 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
11161116 SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
11171117
11181118 assert(OpNo == 2 && "Only know how to promote the mask!");
1119 SDValue DataOp = N->getData();
1119 SDValue DataOp = N->getValue();
11201120 EVT DataVT = DataOp.getValueType();
11211121 SDValue Mask = N->getMask();
11221122 EVT MaskVT = Mask.getValueType();
11231123 SDLoc dl(N);
11241124
1125 bool TruncateStore = false;
11251126 if (!TLI.isTypeLegal(DataVT)) {
11261127 if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
11271128 DataOp = GetPromotedInteger(DataOp);
11281129 Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
1130 TruncateStore = true;
11291131 }
11301132 else {
11311133 assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
11551157 }
11561158 else
11571159 Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
1158 SmallVector NewOps(N->op_begin(), N->op_end());
1159 NewOps[2] = Mask;
1160 NewOps[3] = DataOp;
1161 return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
1160 return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
1161 N->getMemoryVT(), N->getMemOperand(),
1162 TruncateStore);
11621163 }
11631164
11641165 SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
658658 SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
659659 SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
660660 SDValue WidenVecOp_STORE(SDNode* N);
661 SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
661662 SDValue WidenVecOp_SETCC(SDNode* N);
662663
663664 SDValue WidenVecOp_Convert(SDNode *N);
991991 SDValue Ptr = MLD->getBasePtr();
992992 SDValue Mask = MLD->getMask();
993993 unsigned Alignment = MLD->getOriginalAlignment();
994 ISD::LoadExtType ExtType = MLD->getExtensionType();
994995
995996 // if Alignment is equal to the vector size,
996997 // take the half of it for the second part
10141015 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
10151016 Alignment, MLD->getAAInfo(), MLD->getRanges());
10161017
1017 Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO);
1018 Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
1019 ExtType);
10181020
10191021 unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
10201022 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
10251027 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
10261028 SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
10271029
1028 Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO);
1030 Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
1031 ExtType);
10291032
10301033
10311034 // Build a factor node to remember that this load is independent of the
14631466 SDValue Ch = N->getChain();
14641467 SDValue Ptr = N->getBasePtr();
14651468 SDValue Mask = N->getMask();
1466 SDValue Data = N->getData();
1469 SDValue Data = N->getValue();
14671470 EVT MemoryVT = N->getMemoryVT();
14681471 unsigned Alignment = N->getOriginalAlignment();
14691472 SDLoc DL(N);
14881491 MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
14891492 Alignment, N->getAAInfo(), N->getRanges());
14901493
1491 Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO);
1494 Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
1495 N->isTruncatingStore());
14921496
14931497 unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
14941498 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14991503 MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
15001504 SecondHalfAlignment, N->getAAInfo(), N->getRanges());
15011505
1502 Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO);
1506 Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
1507 N->isTruncatingStore());
15031508
15041509
15051510 // Build a factor node to remember that this store is independent of the
24112416 SDValue Mask = N->getMask();
24122417 EVT MaskVT = Mask.getValueType();
24132418 SDValue Src0 = GetWidenedVector(N->getSrc0());
2419 ISD::LoadExtType ExtType = N->getExtensionType();
24142420 SDLoc dl(N);
24152421
24162422 if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
24332439 Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
24342440 }
24352441
2436 // Rebuild memory operand because MemoryVT was changed
2437 MachineMemOperand *MMO = DAG.getMachineFunction().
2438 getMachineMemOperand(N->getPointerInfo(),
2439 MachineMemOperand::MOLoad, WidenVT.getStoreSize(),
2440 N->getAlignment(), N->getAAInfo(), N->getRanges());
2441
24422442 SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
2443 Mask, Src0, MMO);
2443 Mask, Src0, N->getMemoryVT(),
2444 N->getMemOperand(), ExtType);
24442445 // Legalized the chain result - switch anything that used the old chain to
24452446 // use the new one.
24462447 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
25922593 case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
25932594 case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
25942595 case ISD::STORE: Res = WidenVecOp_STORE(N); break;
2596 case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
25952597 case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
25962598
25972599 case ISD::ANY_EXTEND:
27882790 return StChain[0];
27892791 else
27902792 return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
2793 }
2794
2795 SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
2796 MaskedStoreSDNode *MST = cast(N);
2797 SDValue Mask = MST->getMask();
2798 EVT MaskVT = Mask.getValueType();
2799 SDValue StVal = MST->getValue();
2800 // Widen the value
2801 SDValue WideVal = GetWidenedVector(StVal);
2802 SDLoc dl(N);
2803
2804 if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
2805 Mask = GetWidenedVector(Mask);
2806 else {
2807 // The mask should be widened as well
2808 EVT BoolVT = getSetCCResultType(WideVal.getValueType());
2809 // We can't use ModifyToType() because we should fill the mask with
2810 // zeroes
2811 unsigned WidenNumElts = BoolVT.getVectorNumElements();
2812 unsigned MaskNumElts = MaskVT.getVectorNumElements();
2813
2814 unsigned NumConcat = WidenNumElts / MaskNumElts;
2815 SmallVector Ops(NumConcat);
2816 SDValue ZeroVal = DAG.getConstant(0, MaskVT);
2817 Ops[0] = Mask;
2818 for (unsigned i = 1; i != NumConcat; ++i)
2819 Ops[i] = ZeroVal;
2820
2821 Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
2822 }
2823 assert(Mask.getValueType().getVectorNumElements() ==
2824 WideVal.getValueType().getVectorNumElements() &&
2825 "Mask and data vectors should have the same number of elements");
2826 return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
2827 Mask, MST->getMemoryVT(), MST->getMemOperand(),
2828 false);
27912829 }
27922830
27932831 SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
49234923
49244924 SDValue
49254925 SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
4926 SDValue Ptr, SDValue Mask, SDValue Src0,
4927 MachineMemOperand *MMO) {
4926 SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
4927 MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {
49284928
49294929 SDVTList VTs = getVTList(VT, MVT::Other);
49304930 SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
49314931 FoldingSetNodeID ID;
49324932 AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
49334933 ID.AddInteger(VT.getRawBits());
4934 ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
4934 ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
49354935 MMO->isVolatile(),
49364936 MMO->isNonTemporal(),
49374937 MMO->isInvariant()));
49434943 }
49444944 SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
49454945 dl.getDebugLoc(), Ops, 4, VTs,
4946 VT, MMO);
4946 ExtTy, MemVT, MMO);
49474947 CSEMap.InsertNode(N, IP);
49484948 InsertNode(N);
49494949 return SDValue(N, 0);
49504950 }
49514951
49524952 SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
4953 SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) {
4953 SDValue Ptr, SDValue Mask, EVT MemVT,
4954 MachineMemOperand *MMO, bool isTrunc) {
49544955 assert(Chain.getValueType() == MVT::Other &&
49554956 "Invalid chain type");
49564957 EVT VT = Val.getValueType();
49694970 }
49704971 SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
49714972 dl.getDebugLoc(), Ops, 4,
4972 VTs, VT, MMO);
4973 VTs, isTrunc, MemVT, MMO);
49734974 CSEMap.InsertNode(N, IP);
49744975 InsertNode(N);
49754976 return SDValue(N, 0);
36663666 getMachineMemOperand(MachinePointerInfo(PtrOperand),
36673667 MachineMemOperand::MOStore, VT.getStoreSize(),
36683668 Alignment, AAInfo);
3669 SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO);
3669 SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
3670 MMO, false);
36703671 DAG.setRoot(StoreNode);
36713672 setValue(&I, StoreNode);
36723673 }
37053706 MachineMemOperand::MOLoad, VT.getStoreSize(),
37063707 Alignment, AAInfo, Ranges);
37073708
3708 SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO);
3709 SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
3710 ISD::NON_EXTLOAD);
37093711 SDValue OutChain = Load.getValue(1);
37103712 DAG.setRoot(OutChain);
37113713 setValue(&I, Load);
16781678 setTargetDAGCombine(ISD::FMA);
16791679 setTargetDAGCombine(ISD::SUB);
16801680 setTargetDAGCombine(ISD::LOAD);
1681 setTargetDAGCombine(ISD::MLOAD);
16811682 setTargetDAGCombine(ISD::STORE);
1683 setTargetDAGCombine(ISD::MSTORE);
16821684 setTargetDAGCombine(ISD::ZERO_EXTEND);
16831685 setTargetDAGCombine(ISD::ANY_EXTEND);
16841686 setTargetDAGCombine(ISD::SIGN_EXTEND);
2473724739 return SDValue();
2473824740 }
2473924741
24742 /// PerformMLOADCombine - Resolve extending loads
24743 static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
24744 TargetLowering::DAGCombinerInfo &DCI,
24745 const X86Subtarget *Subtarget) {
24746 MaskedLoadSDNode *Mld = cast(N);
24747 if (Mld->getExtensionType() != ISD::SEXTLOAD)
24748 return SDValue();
24749
24750 EVT VT = Mld->getValueType(0);
24751 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24752 unsigned NumElems = VT.getVectorNumElements();
24753 EVT LdVT = Mld->getMemoryVT();
24754 SDLoc dl(Mld);
24755
24756 assert(LdVT != VT && "Cannot extend to the same type");
24757 unsigned ToSz = VT.getVectorElementType().getSizeInBits();
24758 unsigned FromSz = LdVT.getVectorElementType().getSizeInBits();
24759 // From, To sizes and ElemCount must be pow of two
24760 assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
24761 "Unexpected size for extending masked load");
24762
24763 unsigned SizeRatio = ToSz / FromSz;
24764 assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
24765
24766 // Create a type on which we perform the shuffle
24767 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
24768 LdVT.getScalarType(), NumElems*SizeRatio);
24769 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
24770
24771 // Convert Src0 value
24772 SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0());
24773 if (Mld->getSrc0().getOpcode() != ISD::UNDEF) {
24774 SmallVector ShuffleVec(NumElems * SizeRatio, -1);
24775 for (unsigned i = 0; i != NumElems; ++i)
24776 ShuffleVec[i] = i * SizeRatio;
24777
24778 // Can't shuffle using an illegal type.
24779 assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
24780 WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
24781 DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
24782 }
24783 // Prepare the new mask
24784 SDValue NewMask;
24785 SDValue Mask = Mld->getMask();
24786 if (Mask.getValueType() == VT) {
24787 // Mask and original value have the same type
24788 NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
24789 SmallVector ShuffleVec(NumElems * SizeRatio, -1);
24790 for (unsigned i = 0; i != NumElems; ++i)
24791 ShuffleVec[i] = i * SizeRatio;
24792 for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
24793 ShuffleVec[i] = NumElems*SizeRatio;
24794 NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
24795 DAG.getConstant(0, WideVecVT),
24796 &ShuffleVec[0]);
24797 }
24798 else {
24799 assert(Mask.getValueType().getVectorElementType() == MVT::i1);
24800 unsigned WidenNumElts = NumElems*SizeRatio;
24801 unsigned MaskNumElts = VT.getVectorNumElements();
24802 EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
24803 WidenNumElts);
24804
24805 unsigned NumConcat = WidenNumElts / MaskNumElts;
24806 SmallVector Ops(NumConcat);
24807 SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
24808 Ops[0] = Mask;
24809 for (unsigned i = 1; i != NumConcat; ++i)
24810 Ops[i] = ZeroVal;
24811
24812 NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
24813 }
24814
24815 SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
24816 Mld->getBasePtr(), NewMask, WideSrc0,
24817 Mld->getMemoryVT(), Mld->getMemOperand(),
24818 ISD::NON_EXTLOAD);
24819 SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
24820 return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
24821
24822 }
24823 /// PerformMSTORECombine - Resolve truncating stores
24824 static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
24825 const X86Subtarget *Subtarget) {
24826 MaskedStoreSDNode *Mst = cast(N);
24827 if (!Mst->isTruncatingStore())
24828 return SDValue();
24829
24830 EVT VT = Mst->getValue().getValueType();
24831 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24832 unsigned NumElems = VT.getVectorNumElements();
24833 EVT StVT = Mst->getMemoryVT();
24834 SDLoc dl(Mst);
24835
24836 assert(StVT != VT && "Cannot truncate to the same type");
24837 unsigned FromSz = VT.getVectorElementType().getSizeInBits();
24838 unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
24839
24840 // From, To sizes and ElemCount must be pow of two
24841 assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
24842 "Unexpected size for truncating masked store");
24843 // We are going to use the original vector elt for storing.
24844 // Accumulated smaller vector elements must be a multiple of the store size.
24845 assert (((NumElems * FromSz) % ToSz) == 0 &&
24846 "Unexpected ratio for truncating masked store");
24847
24848 unsigned SizeRatio = FromSz / ToSz;
24849 assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
24850
24851 // Create a type on which we perform the shuffle
24852 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
24853 StVT.getScalarType(), NumElems*SizeRatio);
24854
24855 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
24856
24857 SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue());
24858 SmallVector ShuffleVec(NumElems * SizeRatio, -1);
24859 for (unsigned i = 0; i != NumElems; ++i)
24860 ShuffleVec[i] = i * SizeRatio;
24861
24862 // Can't shuffle using an illegal type.
24863 assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
24864
24865 SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
24866 DAG.getUNDEF(WideVecVT),
24867 &ShuffleVec[0]);
24868
24869 SDValue NewMask;
24870 SDValue Mask = Mst->getMask();
24871 if (Mask.getValueType() == VT) {
24872 // Mask and original value have the same type
24873 NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
24874 for (unsigned i = 0; i != NumElems; ++i)
24875 ShuffleVec[i] = i * SizeRatio;
24876 for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
24877 ShuffleVec[i] = NumElems*SizeRatio;
24878 NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
24879 DAG.getConstant(0, WideVecVT),
24880 &ShuffleVec[0]);
24881 }
24882 else {
24883 assert(Mask.getValueType().getVectorElementType() == MVT::i1);
24884 unsigned WidenNumElts = NumElems*SizeRatio;
24885 unsigned MaskNumElts = VT.getVectorNumElements();
24886 EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
24887 WidenNumElts);
24888
24889 unsigned NumConcat = WidenNumElts / MaskNumElts;
24890 SmallVector Ops(NumConcat);
24891 SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
24892 Ops[0] = Mask;
24893 for (unsigned i = 1; i != NumConcat; ++i)
24894 Ops[i] = ZeroVal;
24895
24896 NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
24897 }
24898
24899 return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
24900 NewMask, StVT, Mst->getMemOperand(), false);
24901 }
2474024902 /// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
2474124903 static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
2474224904 const X86Subtarget *Subtarget) {
2583525997 case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
2583625998 case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
2583725999 case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
26000 case ISD::MLOAD: return PerformMLOADCombine(N, DAG, DCI, Subtarget);
2583826001 case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
26002 case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget);
2583926003 case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
2584026004 case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
2584126005 case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
158158 }
159159
160160 ; AVX2-LABEL: test15
161 ; AVX2: vpmaskmovq
161 ; AVX2: vpmaskmovd
162162 define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
163163 %mask = icmp eq <2 x i32> %trigger, zeroinitializer
164164 call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
175175 }
176176
177177 ; AVX2-LABEL: test17
178 ; AVX2: vpmaskmovq
179 ; AVX2: vblendvpd
178 ; AVX2: vpmaskmovd
179 ; AVX2: vblendvps
180 ; AVX2: vpmovsxdq
180181 define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
181182 %mask = icmp eq <2 x i32> %trigger, zeroinitializer
182183 %res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)