llvm.org GIT mirror llvm / 9d796db
Add alternative support for FP_ROUND from v2f32 to v2f64 - Due to the current matching vector elements constraints in ISD::FP_EXTEND, rounding from v2f32 to v2f64 is scalarized. Add a customized v2f32 widening to convert it into a target-specific X86ISD::VFPEXT to work around this constraints. This patch also reverts a previous attempt to fix this issue by recovering the scalarized ISD::FP_EXTEND pattern and thus significantly reduces the overhead of supporting non-power-2 vector FP extend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165625 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Liao 7 years ago
4 changed file(s) with 26 addition(s) and 90 deletion(s). Raw diff Collapse all Expand all
633633 SDValue WidenVecRes_InregOp(SDNode *N);
634634
635635 // Widen Vector Operand.
636 bool WidenVectorOperand(SDNode *N, unsigned ResNo);
636 bool WidenVectorOperand(SDNode *N, unsigned OpNo);
637637 SDValue WidenVecOp_BITCAST(SDNode *N);
638638 SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
639639 SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
20812081 //===----------------------------------------------------------------------===//
20822082 // Widen Vector Operand
20832083 //===----------------------------------------------------------------------===//
2084 bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
2085 DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
2084 bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
2085 DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
20862086 N->dump(&DAG);
20872087 dbgs() << "\n");
20882088 SDValue Res = SDValue();
20892089
2090 // See if the target wants to custom widen this node.
2091 if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
2092 return false;
2093
20902094 switch (N->getOpcode()) {
20912095 default:
20922096 #ifndef NDEBUG
2093 dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
2097 dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
20942098 N->dump(&DAG);
20952099 dbgs() << "\n";
20962100 #endif
938938 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
939939 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
940940
941 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
942
941943 setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
942944 }
943945
51605162 return SDValue();
51615163 }
51625164
5163 // LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
5164 // and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
5165 // constraint of matching input/output vector elements.
5166 SDValue
5167 X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
5168 DebugLoc DL = Op.getDebugLoc();
5169 SDNode *N = Op.getNode();
5170 EVT VT = Op.getValueType();
5171 unsigned NumElts = Op.getNumOperands();
5172
5173 // Check supported types and sub-targets.
5174 //
5175 // Only v2f32 -> v2f64 needs special handling.
5176 if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
5177 return SDValue();
5178
5179 SDValue VecIn;
5180 EVT VecInVT;
5181 SmallVector Mask;
5182 EVT SrcVT = MVT::Other;
5183
5184 // Check the patterns could be translated into X86vfpext.
5185 for (unsigned i = 0; i < NumElts; ++i) {
5186 SDValue In = N->getOperand(i);
5187 unsigned Opcode = In.getOpcode();
5188
5189 // Skip if the element is undefined.
5190 if (Opcode == ISD::UNDEF) {
5191 Mask.push_back(-1);
5192 continue;
5193 }
5194
5195 // Quit if one of the elements is not defined from 'fpext'.
5196 if (Opcode != ISD::FP_EXTEND)
5197 return SDValue();
5198
5199 // Check how the source of 'fpext' is defined.
5200 SDValue L2In = In.getOperand(0);
5201 EVT L2InVT = L2In.getValueType();
5202
5203 // Check the original type
5204 if (SrcVT == MVT::Other)
5205 SrcVT = L2InVT;
5206 else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
5207 return SDValue();
5208
5209 // Check whether the value being 'fpext'ed is extracted from the same
5210 // source.
5211 Opcode = L2In.getOpcode();
5212
5213 // Quit if it's not extracted with a constant index.
5214 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
5215 !isa(L2In.getOperand(1)))
5216 return SDValue();
5217
5218 SDValue ExtractedFromVec = L2In.getOperand(0);
5219
5220 if (VecIn.getNode() == 0) {
5221 VecIn = ExtractedFromVec;
5222 VecInVT = ExtractedFromVec.getValueType();
5223 } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
5224 return SDValue();
5225
5226 Mask.push_back(cast(L2In.getOperand(1))->getZExtValue());
5227 }
5228
5229 // Quit if all operands of BUILD_VECTOR are undefined.
5230 if (!VecIn.getNode())
5231 return SDValue();
5232
5233 // Fill the remaining mask as undef.
5234 for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
5235 Mask.push_back(-1);
5236
5237 return DAG.getNode(X86ISD::VFPEXT, DL, VT,
5238 DAG.getVectorShuffle(VecInVT, DL,
5239 VecIn, DAG.getUNDEF(VecInVT),
5240 &Mask[0]));
5241 }
5242
52435165 SDValue
52445166 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
52455167 DebugLoc dl = Op.getDebugLoc();
52715193 SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
52725194 if (Broadcast.getNode())
52735195 return Broadcast;
5274
5275 SDValue FpExt = LowerVectorFpExtend(Op, DAG);
5276 if (FpExt.getNode())
5277 return FpExt;
52785196
52795197 unsigned EVTBits = ExtVT.getSizeInBits();
52805198
82128130
82138131 // The node is the result.
82148132 return FIST;
8133 }
8134
8135 SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
8136 SelectionDAG &DAG) const {
8137 DebugLoc DL = Op.getDebugLoc();
8138 EVT VT = Op.getValueType();
8139 SDValue In = Op.getOperand(0);
8140 EVT SVT = In.getValueType();
8141
8142 assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
8143
8144 return DAG.getNode(X86ISD::VFPEXT, DL, VT,
8145 DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
8146 In, DAG.getUNDEF(SVT)));
82158147 }
82168148
82178149 SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
1140611338 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
1140711339 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
1140811340 case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
11341 case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
1140911342 case ISD::FABS: return LowerFABS(Op, DAG);
1141011343 case ISD::FNEG: return LowerFNEG(Op, DAG);
1141111344 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
787787 SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
788788 SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
789789 SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
790 SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
790791 SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
791792 SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
792793 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
817818
818819 SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
819820
820 SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
821
822821 virtual SDValue
823822 LowerFormalArguments(SDValue Chain,
824823 CallingConv::ID CallConv, bool isVarArg,