llvm.org GIT mirror llvm / 1a36c64
[X86][SSE] Refactored shuffle BLEND combining to make future 16i16 support easier. NFCI. Call the matchVectorShuffleAsBlend test as early as possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298925 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
1 changed file(s) with 34 addition(s) and 35 deletion(s). Raw diff Collapse all Expand all
2713927139 }
2714027140
2714127141 // Attempt to combine to X86ISD::BLENDI.
27142 // TODO - add 16i16 support (requires lane duplication).
2714227143 if (NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) ||
2714327144 (Subtarget.hasAVX() && MaskVT.is256BitVector()))) {
27144 // Determine a type compatible with X86ISD::BLENDI.
27145 // TODO - add 16i16 support (requires lane duplication).
27146 MVT BlendVT = MaskVT;
27147 if (Subtarget.hasAVX2()) {
27148 if (BlendVT == MVT::v4i64)
27149 BlendVT = MVT::v8i32;
27150 else if (BlendVT == MVT::v2i64)
27151 BlendVT = MVT::v4i32;
27152 } else {
27153 if (BlendVT == MVT::v2i64 || BlendVT == MVT::v4i32)
27154 BlendVT = MVT::v8i16;
27155 else if (BlendVT == MVT::v4i64)
27156 BlendVT = MVT::v4f64;
27157 else if (BlendVT == MVT::v8i32)
27158 BlendVT = MVT::v8f32;
27159 }
27160
27161 if (NumMaskElts <= BlendVT.getVectorNumElements()) {
27162 uint64_t BlendMask = 0;
27163 bool ForceV1Zero = false, ForceV2Zero = false;
27164 SmallVector TargetMask(Mask.begin(), Mask.end());
27165 if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero,
27166 ForceV2Zero, BlendMask)) {
27167 if (NumMaskElts < BlendVT.getVectorNumElements()) {
27168 int Scale = BlendVT.getVectorNumElements() / NumMaskElts;
27169 BlendMask =
27170 scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
27171 }
27172
27173 V1 = ForceV1Zero ? getZeroVector(BlendVT, Subtarget, DAG, DL) : V1;
27174 V2 = ForceV2Zero ? getZeroVector(BlendVT, Subtarget, DAG, DL) : V2;
27175 PermuteImm = (unsigned)BlendMask;
27176 Shuffle = X86ISD::BLENDI;
27177 ShuffleVT = BlendVT;
27178 return true;
27145 uint64_t BlendMask = 0;
27146 bool ForceV1Zero = false, ForceV2Zero = false;
27147 SmallVector TargetMask(Mask.begin(), Mask.end());
27148 if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero,
27149 BlendMask)) {
27150 // Determine a type compatible with X86ISD::BLENDI.
27151 ShuffleVT = MaskVT;
27152 if (Subtarget.hasAVX2()) {
27153 if (ShuffleVT == MVT::v4i64)
27154 ShuffleVT = MVT::v8i32;
27155 else if (ShuffleVT == MVT::v2i64)
27156 ShuffleVT = MVT::v4i32;
27157 } else {
27158 if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
27159 ShuffleVT = MVT::v8i16;
27160 else if (ShuffleVT == MVT::v4i64)
27161 ShuffleVT = MVT::v4f64;
27162 else if (ShuffleVT == MVT::v8i32)
27163 ShuffleVT = MVT::v8f32;
2717927164 }
27165
27166 V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
27167 V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
27168
27169 if (!ShuffleVT.isFloatingPoint()) {
27170 int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits();
27171 BlendMask = scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
27172 ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale);
27173 ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale);
27174 }
27175
27176 PermuteImm = (unsigned)BlendMask;
27177 Shuffle = X86ISD::BLENDI;
27178 return true;
2718027179 }
2718127180 }
2718227181