llvm.org GIT mirror llvm / 5ea7215
Lower vselects into X86ISD::BLENDI when appropriate. LowerVSELECT will, if possible, generate a X86ISD::BLENDI DAG node if the condition is constant and we can emit that instruction, given the subtarget. This is not enough for all cases. An additional SELECTCombine optimization will be committed. Fixed tests that were expecting variable blends but where a blend+imm can be generated. Added test where we can't emit blend+immediate. Added avx2 blend+imm tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209043 91177308-0d34-0410-b5e6-96231b3b80d8 Filipe Cabecinhas 5 years ago
6 changed file(s) with 128 addition(s) and 19 deletion(s). Raw diff Collapse all Expand all
79707970 return SDValue();
79717971 }
79727972
7973 // This function assumes its argument is a BUILD_VECTOR of constand or
7974 // undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
7975 // true.
7976 static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
7977 unsigned &MaskValue) {
7978 MaskValue = 0;
7979 unsigned NumElems = BuildVector->getNumOperands();
7980 // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
7981 unsigned NumLanes = (NumElems - 1) / 8 + 1;
7982 unsigned NumElemsInLane = NumElems / NumLanes;
7983
7984 // Blend for v16i16 should be symetric for the both lanes.
7985 for (unsigned i = 0; i < NumElemsInLane; ++i) {
7986 SDValue EltCond = BuildVector->getOperand(i);
7987 SDValue SndLaneEltCond =
7988 (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond;
7989
7990 int Lane1Cond = -1, Lane2Cond = -1;
7991 if (isa(EltCond))
7992 Lane1Cond = !isZero(EltCond);
7993 if (isa(SndLaneEltCond))
7994 Lane2Cond = !isZero(SndLaneEltCond);
7995
7996 if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
7997 MaskValue |= !!Lane1Cond << i;
7998 else if (Lane1Cond < 0)
7999 MaskValue |= !!Lane2Cond << i;
8000 else
8001 return false;
8002 }
8003 return true;
8004 }
8005
8006 // Try to lower a vselect node into a simple blend instruction.
8007 static SDValue LowerVSELECTtoBlend(SDValue Op, const X86Subtarget *Subtarget,
8008 SelectionDAG &DAG) {
8009 SDValue Cond = Op.getOperand(0);
8010 SDValue LHS = Op.getOperand(1);
8011 SDValue RHS = Op.getOperand(2);
8012 SDLoc dl(Op);
8013 MVT VT = Op.getSimpleValueType();
8014 MVT EltVT = VT.getVectorElementType();
8015 unsigned NumElems = VT.getVectorNumElements();
8016
8017 // There is no blend with immediate in AVX-512.
8018 if (VT.is512BitVector())
8019 return SDValue();
8020
8021 if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
8022 return SDValue();
8023 if (!Subtarget->hasInt256() && VT == MVT::v16i16)
8024 return SDValue();
8025
8026 if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
8027 return SDValue();
8028
8029 // Check the mask for BLEND and build the value.
8030 unsigned MaskValue = 0;
8031 if (!BUILD_VECTORtoBlendMask(cast(Cond), MaskValue))
8032 return SDValue();
8033
8034 // Convert i32 vectors to floating point if it is not AVX2.
8035 // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
8036 MVT BlendVT = VT;
8037 if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
8038 BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
8039 NumElems);
8040 LHS = DAG.getNode(ISD::BITCAST, dl, VT, LHS);
8041 RHS = DAG.getNode(ISD::BITCAST, dl, VT, RHS);
8042 }
8043
8044 SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, LHS, RHS,
8045 DAG.getConstant(MaskValue, MVT::i32));
8046 return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
8047 }
8048
79738049 SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
8050 SDValue BlendOp = LowerVSELECTtoBlend(Op, Subtarget, DAG);
8051 if (BlendOp.getNode())
8052 return BlendOp;
8053
79748054 // Some types for vselect were previously set to Expand, not Legal or
79758055 // Custom. Return an empty SDValue so we fall-through to Expand, after
79768056 // the Custom lowering phase.
79838063 return SDValue();
79848064 }
79858065
7986 // This node is Legal.
8066 // We couldn't create a "Blend with immediate" node.
8067 // This node should still be legal, but we'll have to emit a blendv*
8068 // instruction.
79878069 return Op;
79888070 }
79898071
22 ; AVX128 tests:
33
44 ;CHECK-LABEL: vsel_float:
5 ;CHECK: vblendvps
5 ;CHECK: vblendps $5
66 ;CHECK: ret
77 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
88 %vsel = select <4 x i1> , <4 x float> %v1, <4 x float> %v2
1111
1212
1313 ;CHECK-LABEL: vsel_i32:
14 ;CHECK: vblendvps
14 ;CHECK: vblendps $5
1515 ;CHECK: ret
1616 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
1717 %vsel = select <4 x i1> , <4 x i32> %v1, <4 x i32> %v2
5151
5252 ;CHECK-LABEL: vsel_float8:
5353 ;CHECK-NOT: vinsertf128
54 ;CHECK: vblendvps
54 ;CHECK: vblendps $17
5555 ;CHECK: ret
5656 define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
5757 %vsel = select <8 x i1> , <8 x float> %v1, <8 x float> %v2
6060
6161 ;CHECK-LABEL: vsel_i328:
6262 ;CHECK-NOT: vinsertf128
63 ;CHECK: vblendvps
63 ;CHECK: vblendps $17
6464 ;CHECK-NEXT: ret
6565 define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
6666 %vsel = select <8 x i1> , <8 x i32> %v1, <8 x i32> %v2
8585
8686 ;CHECK-LABEL: vsel_double4:
8787 ;CHECK-NOT: vinsertf128
88 ;CHECK: vblendvpd
88 ;CHECK: vblendpd $5
8989 ;CHECK-NEXT: ret
9090 define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
9191 %vsel = select <4 x i1> , <4 x double> %v1, <4 x double> %v2
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
1
2 define <4 x i32> @blendvb_fallback_v4i32(<4 x i1> %mask, <4 x i32> %x, <4 x i32> %y) {
3 ; CHECK-LABEL: @blendvb_fallback_v4i32
4 ; CHECK: vblendvps
5 ; CHECK: ret
6 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %y
7 ret <4 x i32> %ret
8 }
9
10 define <8 x i32> @blendvb_fallback_v8i32(<8 x i1> %mask, <8 x i32> %x, <8 x i32> %y) {
11 ; CHECK-LABEL: @blendvb_fallback_v8i32
12 ; CHECK: vblendvps
13 ; CHECK: ret
14 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
15 ret <8 x i32> %ret
16 }
17
18 define <8 x float> @blendvb_fallback_v8f32(<8 x i1> %mask, <8 x float> %x, <8 x float> %y) {
19 ; CHECK-LABEL: @blendvb_fallback_v8f32
20 ; CHECK: vblendvps
21 ; CHECK: ret
22 %ret = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
23 ret <8 x float> %ret
24 }
33 ; Verify that we produce movss instead of blendvps when possible.
44
55 ;CHECK-LABEL: vsel_float:
6 ;CHECK-NOT: blendvps
6 ;CHECK-NOT: blend
77 ;CHECK: movss
88 ;CHECK: ret
99 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
1212 }
1313
1414 ;CHECK-LABEL: vsel_4xi8:
15 ;CHECK-NOT: blendvps
15 ;CHECK-NOT: blend
1616 ;CHECK: movss
1717 ;CHECK: ret
1818 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
2020 ret <4 x i8> %vsel
2121 }
2222
23
24 ; We do not have native support for v8i16 blends and we have to use the
25 ; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not
26 ; reduce the mask in this case.
2723 ;CHECK-LABEL: vsel_8xi16:
28 ;CHECK: andps
29 ;CHECK: andps
30 ;CHECK: orps
24 ;CHECK: pblendw $17
3125 ;CHECK: ret
3226 define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
3327 %vsel = select <8 x i1> , <8 x i16> %v1, <8 x i16> %v2
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
11
22 ;CHECK-LABEL: vsel_float:
3 ;CHECK: blendvps
3 ;CHECK: blendps
44 ;CHECK: ret
55 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
66 %vsel = select <4 x i1> , <4 x float> %v1, <4 x float> %v2
99
1010
1111 ;CHECK-LABEL: vsel_4xi8:
12 ;CHECK: blendvps
12 ;CHECK: blendps
1313 ;CHECK: ret
1414 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
1515 %vsel = select <4 x i1> , <4 x i8> %v1, <4 x i8> %v2
1717 }
1818
1919 ;CHECK-LABEL: vsel_4xi16:
20 ;CHECK: blendvps
20 ;CHECK: blendps
2121 ;CHECK: ret
2222 define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
2323 %vsel = select <4 x i1> , <4 x i16> %v1, <4 x i16> %v2
2626
2727
2828 ;CHECK-LABEL: vsel_i32:
29 ;CHECK: blendvps
29 ;CHECK: blendps
3030 ;CHECK: ret
3131 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
3232 %vsel = select <4 x i1> , <4 x i32> %v1, <4 x i32> %v2
575575 %res = select <4 x i1> %mask, <4 x float> %x, <4 x float>%vecinit5
576576 ret <4 x float> %res
577577 }
578
579 define <8 x i16> @blendvb_fallback(<8 x i1> %mask, <8 x i16> %x, <8 x i16> %y) {
580 ; CHECK-LABEL: blendvb_fallback
581 ; CHECK: blendvb
582 ; CHECK: ret
583 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y
584 ret <8 x i16> %ret
585 }