llvm.org GIT mirror llvm / f4e104f
AArch64: Constant fold converting vector setcc results to float. Since the result of a SETCC for AArch64 is 0 or -1 in each lane, we can move unary operations, in this case [su]int_to_fp through the mask operation and constant fold the operation away. Generally speaking: UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> AND(VECTOR_CMP(x,y), constant2) where constant2 is UNARYOP(constant). This implements the transform where UNARYOP is [su]int_to_fp. For example, consider the simple function: define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> %result = sitofp <4 x i32> %ext to <4 x float> ret <4 x float> %result } Before this change, the code is generated as: fcmeq.4s v0, v0, v1 movi.4s v1, #0x1 // Integer splat value. and.16b v0, v0, v1 // Mask lanes based on the comparison. scvtf.4s v0, v0 // Convert each lane to f32. ret After, the code is improved to: fcmeq.4s v0, v0, v1 fmov.4s v1, #1.00000000 // f32 splat value. and.16b v0, v0, v1 // Mask lanes based on the comparison. ret The svvtf.4s has been constant folded away and the floating point 1.0f vector lanes are materialized directly via fmov.4s. Rather than do the folding manually in the target code, teach getNode() in the generic SelectionDAG to handle folding constant operands of vector [su]int_to_fp nodes. It is reasonable (as noted in a FIXME) to do additional constant folding there as well, but I don't have test cases for those operations, so leaving them for another time when it becomes appropriate. rdar://17693791 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213341 91177308-0d34-0410-b5e6-96231b3b80d8 Jim Grosbach 6 years ago
3 changed file(s) with 90 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
27682768 else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
27692769 return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
27702770 break;
2771 }
2772 }
2773
2774 // Constant fold unary operations with a vector integer operand.
2775 if (BuildVectorSDNode *BV = dyn_cast(Operand.getNode())) {
2776 APInt Val;
2777 APInt DummyUndefs;
2778 unsigned SplatBitSize;
2779 bool DummyHasUndefs;
2780 if (BV->isConstantSplat(Val, DummyUndefs, SplatBitSize, DummyHasUndefs)) {
2781 switch (Opcode) {
2782 default:
2783 // FIXME: Entirely reasonable to perform folding of other unary
2784 // operations here as the need arises.
2785 break;
2786 case ISD::UINT_TO_FP:
2787 case ISD::SINT_TO_FP: {
2788 APFloat APF(
2789 EVTToAPFloatSemantics(VT.getVectorElementType()),
2790 APInt::getNullValue(VT.getVectorElementType().getSizeInBits()));
2791 (void)APF.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP,
2792 APFloat::rmNearestTiesToEven);
2793
2794 return getConstantFP(APF, VT);
2795 }
2796 }
27712797 }
27722798 }
27732799
64166416 return SDValue();
64176417 }
64186418
6419 static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
6420 SelectionDAG &DAG) {
6421 // Take advantage of vector comparisons producing 0 or -1 in each lane to
6422 // optimize away operation when it's from a constant.
6423 //
6424 // The general transformation is:
6425 // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
6426 // AND(VECTOR_CMP(x,y), constant2)
6427 // constant2 = UNARYOP(constant)
6428
6429 // Early exit if this isn't a vector operation or if the operand of the
6430 // unary operation isn't a bitwise AND.
6431 EVT VT = N->getValueType(0);
6432 if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
6433 N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC)
6434 return SDValue();
6435
6436 // Now check that the other operand of the AND is a constant splat. We could
6437 // make the transformation for non-constant splats as well, but it's unclear
6438 // that would be a benefit as it would not eliminate any operations, just
6439 // perform one more step in scalar code before moving to the vector unit.
6440 if (BuildVectorSDNode *BV =
6441 dyn_cast(N->getOperand(0)->getOperand(1))) {
6442 // Bail out if the vector isn't a constant splat.
6443 if (!BV->getConstantSplatNode())
6444 return SDValue();
6445
6446 // Everything checks out. Build up the new and improved node.
6447 SDLoc DL(N);
6448 EVT IntVT = BV->getValueType(0);
6449 // Create a new constant of the appropriate type for the transformed
6450 // DAG.
6451 SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
6452 // The AND node needs bitcasts to/from an integer vector type around it.
6453 SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
6454 SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
6455 N->getOperand(0)->getOperand(0), MaskConst);
6456 SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
6457 return Res;
6458 }
6459
6460 return SDValue();
6461 }
6462
64196463 static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
6464 // First try to optimize away the conversion when it's conditionally from
6465 // a constant. Vectors only.
6466 SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
6467 if (Res != SDValue())
6468 return Res;
6469
64206470 EVT VT = N->getValueType(0);
64216471 if (VT != MVT::f32 && VT != MVT::f64)
64226472 return SDValue();
6473
64236474 // Only optimize when the source and destination types have the same width.
64246475 if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits())
64256476 return SDValue();
0 ; RUN: llc < %s -asm-verbose=false -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
1
2 define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
3 ; CHECK-LABEL: foo:
4 ; CHECK-NEXT: fcmeq.4s v0, v0, v1
5 ; CHECK-NEXT: fmov.4s v1, #1.00000000
6 ; CHECK-NEXT: and.16b v0, v0, v1
7 ; CHECK-NEXT: ret
8 %cmp = fcmp oeq <4 x float> %val, %test
9 %ext = zext <4 x i1> %cmp to <4 x i32>
10 %result = sitofp <4 x i32> %ext to <4 x float>
11 ret <4 x float> %result
12 }