llvm.org GIT mirror llvm / 802a663
[SDAG][AArch64] Legalize VECREDUCE Fixes https://bugs.llvm.org/show_bug.cgi?id=36796. Implement basic legalizations (PromoteIntRes, PromoteIntOp, ExpandIntRes, ScalarizeVecOp, WidenVecOp) for VECREDUCE opcodes. There are more legalizations missing (esp float legalizations), but there's no way to test them right now, so I'm not adding them. This also includes a few more changes to make this work somewhat reasonably: * Add support for expanding VECREDUCE in SDAG. Usually experimental.vector.reduce is expanded prior to codegen, but if the target does have native vector reduce, it may of course still be necessary to expand due to legalization issues. This uses a shuffle reduction if possible, followed by a naive scalar reduction. * Allow the result type of integer VECREDUCE to be larger than the vector element type. For example we need to be able to reduce a v8i8 into an (nominally) i32 result type on AArch64. * Use the vector operand type rather than the scalar result type to determine the action, so we can control exactly which vector types are supported. Also change the legalize vector op code to handle operations that only have vector operands, but no vector results, as is the case for VECREDUCE. * Default VECREDUCE to Expand. On AArch64 (only target using VECREDUCE), explicitly specify for which vector types the reductions are supported. This does not handle anything related to VECREDUCE_STRICT_*. Differential Revision: https://reviews.llvm.org/D58015 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355860 91177308-0d34-0410-b5e6-96231b3b80d8 Nikita Popov 8 months ago
16 changed file(s) with 1068 addition(s) and 10 deletion(s). Raw diff Collapse all Expand all
871871 VECREDUCE_STRICT_FADD, VECREDUCE_STRICT_FMUL,
872872 /// These reductions are non-strict, and have a single vector operand.
873873 VECREDUCE_FADD, VECREDUCE_FMUL,
874 /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
875 VECREDUCE_FMAX, VECREDUCE_FMIN,
876 /// Integer reductions may have a result type larger than the vector element
877 /// type. However, the reduction is performed using the vector element type
878 /// and the value in the top bits is unspecified.
874879 VECREDUCE_ADD, VECREDUCE_MUL,
875880 VECREDUCE_AND, VECREDUCE_OR, VECREDUCE_XOR,
876881 VECREDUCE_SMAX, VECREDUCE_SMIN, VECREDUCE_UMAX, VECREDUCE_UMIN,
877 /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
878 VECREDUCE_FMAX, VECREDUCE_FMIN,
879882
880883 /// BUILTIN_OP_END - This must be the last enum value in this list.
881884 /// The target-specific pre-isel opcode values start here.
38923892 bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
38933893 SelectionDAG &DAG) const;
38943894
3895 /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified,
3896 /// only the first Count elements of the vector are used.
3897 SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
3898
38953899 //===--------------------------------------------------------------------===//
38963900 // Instruction Emitting Hooks
38973901 //
397397 SDValue visitMSCATTER(SDNode *N);
398398 SDValue visitFP_TO_FP16(SDNode *N);
399399 SDValue visitFP16_TO_FP(SDNode *N);
400 SDValue visitVECREDUCE(SDNode *N);
400401
401402 SDValue visitFADDForFMACombine(SDNode *N);
402403 SDValue visitFSUBForFMACombine(SDNode *N);
15911592 case ISD::MSTORE: return visitMSTORE(N);
15921593 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
15931594 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1595 case ISD::VECREDUCE_FADD:
1596 case ISD::VECREDUCE_FMUL:
1597 case ISD::VECREDUCE_ADD:
1598 case ISD::VECREDUCE_MUL:
1599 case ISD::VECREDUCE_AND:
1600 case ISD::VECREDUCE_OR:
1601 case ISD::VECREDUCE_XOR:
1602 case ISD::VECREDUCE_SMAX:
1603 case ISD::VECREDUCE_SMIN:
1604 case ISD::VECREDUCE_UMAX:
1605 case ISD::VECREDUCE_UMIN:
1606 case ISD::VECREDUCE_FMAX:
1607 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
15941608 }
15951609 return SDValue();
15961610 }
1830618320 return SDValue();
1830718321 }
1830818322
18323 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
18324 SDValue N0 = N->getOperand(0);
18325 EVT VT = N0.getValueType();
18326
18327 // VECREDUCE over 1-element vector is just an extract.
18328 if (VT.getVectorNumElements() == 1) {
18329 SDLoc dl(N);
18330 SDValue Res = DAG.getNode(
18331 ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
18332 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
18333 if (Res.getValueType() != N->getValueType(0))
18334 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
18335 return Res;
18336 }
18337
18338 return SDValue();
18339 }
18340
1830918341 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
1831018342 /// with the destination vector and a zero vector.
1831118343 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
11381138 case ISD::MSTORE:
11391139 Action = TLI.getOperationAction(Node->getOpcode(),
11401140 cast(Node)->getValue().getValueType());
1141 break;
1142 case ISD::VECREDUCE_FADD:
1143 case ISD::VECREDUCE_FMUL:
1144 case ISD::VECREDUCE_ADD:
1145 case ISD::VECREDUCE_MUL:
1146 case ISD::VECREDUCE_AND:
1147 case ISD::VECREDUCE_OR:
1148 case ISD::VECREDUCE_XOR:
1149 case ISD::VECREDUCE_SMAX:
1150 case ISD::VECREDUCE_SMIN:
1151 case ISD::VECREDUCE_UMAX:
1152 case ISD::VECREDUCE_UMIN:
1153 case ISD::VECREDUCE_FMAX:
1154 case ISD::VECREDUCE_FMIN:
1155 Action = TLI.getOperationAction(
1156 Node->getOpcode(), Node->getOperand(0).getValueType());
11411157 break;
11421158 default:
11431159 if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
36013617 ReplaceNode(SDValue(Node, 0), Result);
36023618 break;
36033619 }
3620 case ISD::VECREDUCE_FADD:
3621 case ISD::VECREDUCE_FMUL:
3622 case ISD::VECREDUCE_ADD:
3623 case ISD::VECREDUCE_MUL:
3624 case ISD::VECREDUCE_AND:
3625 case ISD::VECREDUCE_OR:
3626 case ISD::VECREDUCE_XOR:
3627 case ISD::VECREDUCE_SMAX:
3628 case ISD::VECREDUCE_SMIN:
3629 case ISD::VECREDUCE_UMAX:
3630 case ISD::VECREDUCE_UMIN:
3631 case ISD::VECREDUCE_FMAX:
3632 case ISD::VECREDUCE_FMIN:
3633 Results.push_back(TLI.expandVecReduce(Node, DAG));
3634 break;
36043635 case ISD::GLOBAL_OFFSET_TABLE:
36053636 case ISD::GlobalAddress:
36063637 case ISD::GlobalTLSAddress:
170170 case ISD::ATOMIC_CMP_SWAP:
171171 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
172172 Res = PromoteIntRes_AtomicCmpSwap(cast(N), ResNo);
173 break;
174
175 case ISD::VECREDUCE_ADD:
176 case ISD::VECREDUCE_MUL:
177 case ISD::VECREDUCE_AND:
178 case ISD::VECREDUCE_OR:
179 case ISD::VECREDUCE_XOR:
180 case ISD::VECREDUCE_SMAX:
181 case ISD::VECREDUCE_SMIN:
182 case ISD::VECREDUCE_UMAX:
183 case ISD::VECREDUCE_UMIN:
184 Res = PromoteIntRes_VECREDUCE(N);
173185 break;
174186 }
175187
11061118 case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break;
11071119
11081120 case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
1121
1122 case ISD::VECREDUCE_ADD:
1123 case ISD::VECREDUCE_MUL:
1124 case ISD::VECREDUCE_AND:
1125 case ISD::VECREDUCE_OR:
1126 case ISD::VECREDUCE_XOR:
1127 case ISD::VECREDUCE_SMAX:
1128 case ISD::VECREDUCE_SMIN:
1129 case ISD::VECREDUCE_UMAX:
1130 case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
11091131 }
11101132
11111133 // If the result is null, the sub-method took care of registering results etc.
14801502 SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
14811503 SDValue Op = SExtPromotedInteger(N->getOperand(1));
14821504 return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
1505 }
1506
1507 SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
1508 SDLoc dl(N);
1509 SDValue Op;
1510 switch (N->getOpcode()) {
1511 default: llvm_unreachable("Expected integer vector reduction");
1512 case ISD::VECREDUCE_ADD:
1513 case ISD::VECREDUCE_MUL:
1514 case ISD::VECREDUCE_AND:
1515 case ISD::VECREDUCE_OR:
1516 case ISD::VECREDUCE_XOR:
1517 Op = GetPromotedInteger(N->getOperand(0));
1518 break;
1519 case ISD::VECREDUCE_SMAX:
1520 case ISD::VECREDUCE_SMIN:
1521 Op = SExtPromotedInteger(N->getOperand(0));
1522 break;
1523 case ISD::VECREDUCE_UMAX:
1524 case ISD::VECREDUCE_UMIN:
1525 Op = ZExtPromotedInteger(N->getOperand(0));
1526 break;
1527 }
1528
1529 EVT EltVT = Op.getValueType().getVectorElementType();
1530 EVT VT = N->getValueType(0);
1531 if (VT.bitsGE(EltVT))
1532 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
1533
1534 // Result size must be >= element size. If this is not the case after
1535 // promotion, also promote the result type and then truncate.
1536 SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op);
1537 return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
14831538 }
14841539
14851540 //===----------------------------------------------------------------------===//
16231678 case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
16241679 case ISD::SMULFIX:
16251680 case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break;
1681
1682 case ISD::VECREDUCE_ADD:
1683 case ISD::VECREDUCE_MUL:
1684 case ISD::VECREDUCE_AND:
1685 case ISD::VECREDUCE_OR:
1686 case ISD::VECREDUCE_XOR:
1687 case ISD::VECREDUCE_SMAX:
1688 case ISD::VECREDUCE_SMIN:
1689 case ISD::VECREDUCE_UMAX:
1690 case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
16261691 }
16271692
16281693 // If Lo/Hi is null, the sub-method took care of registering results etc.
31713236 ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
31723237 }
31733238
3239 void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
3240 SDValue &Lo, SDValue &Hi) {
3241 // TODO For VECREDUCE_(AND|OR|XOR) we could split the vector and calculate
3242 // both halves independently.
3243 SDValue Res = TLI.expandVecReduce(N, DAG);
3244 SplitInteger(Res, Lo, Hi);
3245 }
3246
31743247 //===----------------------------------------------------------------------===//
31753248 // Integer Operand Expansion
31763249 //===----------------------------------------------------------------------===//
38393912 V0, ConvElem, N->getOperand(2));
38403913 }
38413914
3915 SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
3916 // The VECREDUCE result size may be larger than the element size, so
3917 // we can simply change the result type.
3918 SDLoc dl(N);
3919 EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
3920 return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
3921 }
3922
38423923 SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
38433924 SDLoc dl(N);
38443925 SDValue V0 = GetPromotedInteger(N->getOperand(0));
345345 SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
346346 SDValue PromoteIntRes_MULFIX(SDNode *N);
347347 SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
348 SDValue PromoteIntRes_VECREDUCE(SDNode *N);
348349
349350 // Integer Operand Promotion.
350351 bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
379380 SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
380381 SDValue PromoteIntOp_MULFIX(SDNode *N);
381382 SDValue PromoteIntOp_FPOWI(SDNode *N);
383 SDValue PromoteIntOp_VECREDUCE(SDNode *N);
382384
383385 void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
384386
437439 void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
438440
439441 void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
442 void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
440443
441444 void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
442445 SDValue &Lo, SDValue &Hi);
704707 SDValue ScalarizeVecOp_VSETCC(SDNode *N);
705708 SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
706709 SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
710 SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
707711
708712 //===--------------------------------------------------------------------===//
709713 // Vector Splitting Support: LegalizeVectorTypes.cpp
834838
835839 SDValue WidenVecOp_Convert(SDNode *N);
836840 SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
841 SDValue WidenVecOp_VECREDUCE(SDNode *N);
837842
838843 //===--------------------------------------------------------------------===//
839844 // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
293293 }
294294 }
295295
296 bool HasVectorValue = false;
297 for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
298 J != E;
299 ++J)
300 HasVectorValue |= J->isVector();
301 if (!HasVectorValue)
296 bool HasVectorValueOrOp = false;
297 for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J)
298 HasVectorValueOrOp |= J->isVector();
299 for (const SDValue &Op : Node->op_values())
300 HasVectorValueOrOp |= Op.getValueType().isVector();
301
302 if (!HasVectorValueOrOp)
302303 return TranslateLegalizeResults(Op, Result);
303304
304305 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
440441 break;
441442 case ISD::SINT_TO_FP:
442443 case ISD::UINT_TO_FP:
444 case ISD::VECREDUCE_ADD:
445 case ISD::VECREDUCE_MUL:
446 case ISD::VECREDUCE_AND:
447 case ISD::VECREDUCE_OR:
448 case ISD::VECREDUCE_XOR:
449 case ISD::VECREDUCE_SMAX:
450 case ISD::VECREDUCE_SMIN:
451 case ISD::VECREDUCE_UMAX:
452 case ISD::VECREDUCE_UMIN:
453 case ISD::VECREDUCE_FADD:
454 case ISD::VECREDUCE_FMUL:
455 case ISD::VECREDUCE_FMAX:
456 case ISD::VECREDUCE_FMIN:
443457 Action = TLI.getOperationAction(Node->getOpcode(),
444458 Node->getOperand(0).getValueType());
445459 break;
815829 case ISD::STRICT_FROUND:
816830 case ISD::STRICT_FTRUNC:
817831 return ExpandStrictFPOp(Op);
832 case ISD::VECREDUCE_ADD:
833 case ISD::VECREDUCE_MUL:
834 case ISD::VECREDUCE_AND:
835 case ISD::VECREDUCE_OR:
836 case ISD::VECREDUCE_XOR:
837 case ISD::VECREDUCE_SMAX:
838 case ISD::VECREDUCE_SMIN:
839 case ISD::VECREDUCE_UMAX:
840 case ISD::VECREDUCE_UMIN:
841 case ISD::VECREDUCE_FADD:
842 case ISD::VECREDUCE_FMUL:
843 case ISD::VECREDUCE_FMAX:
844 case ISD::VECREDUCE_FMIN:
845 return TLI.expandVecReduce(Op.getNode(), DAG);
818846 default:
819847 return DAG.UnrollVectorOp(Op.getNode());
820848 }
605605 case ISD::FP_ROUND:
606606 Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
607607 break;
608 case ISD::VECREDUCE_FADD:
609 case ISD::VECREDUCE_FMUL:
610 case ISD::VECREDUCE_ADD:
611 case ISD::VECREDUCE_MUL:
612 case ISD::VECREDUCE_AND:
613 case ISD::VECREDUCE_OR:
614 case ISD::VECREDUCE_XOR:
615 case ISD::VECREDUCE_SMAX:
616 case ISD::VECREDUCE_SMIN:
617 case ISD::VECREDUCE_UMAX:
618 case ISD::VECREDUCE_UMIN:
619 case ISD::VECREDUCE_FMAX:
620 case ISD::VECREDUCE_FMIN:
621 Res = ScalarizeVecOp_VECREDUCE(N);
622 break;
608623 }
609624 }
610625
733748 N->getValueType(0).getVectorElementType(), Elt,
734749 N->getOperand(1));
735750 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
751 }
752
753 SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
754 SDValue Res = GetScalarizedVector(N->getOperand(0));
755 // Result type may be wider than element type.
756 if (Res.getValueType() != N->getValueType(0))
757 Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res);
758 return Res;
736759 }
737760
738761 //===----------------------------------------------------------------------===//
38673890 case ISD::TRUNCATE:
38683891 Res = WidenVecOp_Convert(N);
38693892 break;
3893
3894 case ISD::VECREDUCE_FADD:
3895 case ISD::VECREDUCE_FMUL:
3896 case ISD::VECREDUCE_ADD:
3897 case ISD::VECREDUCE_MUL:
3898 case ISD::VECREDUCE_AND:
3899 case ISD::VECREDUCE_OR:
3900 case ISD::VECREDUCE_XOR:
3901 case ISD::VECREDUCE_SMAX:
3902 case ISD::VECREDUCE_SMIN:
3903 case ISD::VECREDUCE_UMAX:
3904 case ISD::VECREDUCE_UMIN:
3905 case ISD::VECREDUCE_FMAX:
3906 case ISD::VECREDUCE_FMIN:
3907 Res = WidenVecOp_VECREDUCE(N);
3908 break;
38703909 }
38713910
38723911 // If Res is null, the sub-method took care of registering the result.
42154254 return PromoteTargetBoolean(CC, VT);
42164255 }
42174256
4257 SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
4258 SDLoc dl(N);
4259 SDValue Op = GetWidenedVector(N->getOperand(0));
4260 EVT OrigVT = N->getOperand(0).getValueType();
4261 EVT WideVT = Op.getValueType();
4262 EVT ElemVT = OrigVT.getVectorElementType();
4263
4264 SDValue NeutralElem;
4265 switch (N->getOpcode()) {
4266 case ISD::VECREDUCE_ADD:
4267 case ISD::VECREDUCE_OR:
4268 case ISD::VECREDUCE_XOR:
4269 case ISD::VECREDUCE_UMAX:
4270 NeutralElem = DAG.getConstant(0, dl, ElemVT);
4271 break;
4272 case ISD::VECREDUCE_MUL:
4273 NeutralElem = DAG.getConstant(1, dl, ElemVT);
4274 break;
4275 case ISD::VECREDUCE_AND:
4276 case ISD::VECREDUCE_UMIN:
4277 NeutralElem = DAG.getAllOnesConstant(dl, ElemVT);
4278 break;
4279 case ISD::VECREDUCE_SMAX:
4280 NeutralElem = DAG.getConstant(
4281 APInt::getSignedMinValue(ElemVT.getSizeInBits()), dl, ElemVT);
4282 break;
4283 case ISD::VECREDUCE_SMIN:
4284 NeutralElem = DAG.getConstant(
4285 APInt::getSignedMaxValue(ElemVT.getSizeInBits()), dl, ElemVT);
4286 break;
4287 case ISD::VECREDUCE_FADD:
4288 NeutralElem = DAG.getConstantFP(0.0, dl, ElemVT);
4289 break;
4290 case ISD::VECREDUCE_FMUL:
4291 NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT);
4292 break;
4293 case ISD::VECREDUCE_FMAX:
4294 NeutralElem = DAG.getConstantFP(
4295 std::numeric_limits::infinity(), dl, ElemVT);
4296 break;
4297 case ISD::VECREDUCE_FMIN:
4298 NeutralElem = DAG.getConstantFP(
4299 -std::numeric_limits::infinity(), dl, ElemVT);
4300 break;
4301 }
4302
4303 // Pad the vector with the neutral element.
4304 unsigned OrigElts = OrigVT.getVectorNumElements();
4305 unsigned WideElts = WideVT.getVectorNumElements();
4306 for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
4307 Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
4308 DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
4309
4310 return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags());
4311 }
4312
42184313
42194314 //===----------------------------------------------------------------------===//
42204315 // Vector Widening Utilities
56165616 "Unexpected result type for S/UMULO legalization");
56175617 return true;
56185618 }
5619
5620 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
5621 SDLoc dl(Node);
5622 bool NoNaN = Node->getFlags().hasNoNaNs();
5623 unsigned BaseOpcode = 0;
5624 switch (Node->getOpcode()) {
5625 default: llvm_unreachable("Expected VECREDUCE opcode");
5626 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
5627 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
5628 case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break;
5629 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
5630 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
5631 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
5632 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
5633 case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
5634 case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
5635 case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
5636 case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
5637 case ISD::VECREDUCE_FMAX:
5638 BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
5639 break;
5640 case ISD::VECREDUCE_FMIN:
5641 BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
5642 break;
5643 }
5644
5645 SDValue Op = Node->getOperand(0);
5646 EVT VT = Op.getValueType();
5647
5648 // Try to use a shuffle reduction for power of two vectors.
5649 if (VT.isPow2VectorType()) {
5650 while (VT.getVectorNumElements() > 1) {
5651 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
5652 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
5653 break;
5654
5655 SDValue Lo, Hi;
5656 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
5657 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
5658 VT = HalfVT;
5659 }
5660 }
5661
5662 EVT EltVT = VT.getVectorElementType();
5663 unsigned NumElts = VT.getVectorNumElements();
5664
5665 SmallVector Ops;
5666 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
5667
5668 SDValue Res = Ops[0];
5669 for (unsigned i = 1; i < NumElts; i++)
5670 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
5671
5672 // Result type may be wider than element type.
5673 if (EltVT != Node->getValueType(0))
5674 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
5675 return Res;
5676 }
664664
665665 // For most targets @llvm.get.dynamic.area.offset just returns 0.
666666 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
667
668 // Vector reduction default to expand.
669 setOperationAction(ISD::VECREDUCE_FADD, VT, Expand);
670 setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand);
671 setOperationAction(ISD::VECREDUCE_ADD, VT, Expand);
672 setOperationAction(ISD::VECREDUCE_MUL, VT, Expand);
673 setOperationAction(ISD::VECREDUCE_AND, VT, Expand);
674 setOperationAction(ISD::VECREDUCE_OR, VT, Expand);
675 setOperationAction(ISD::VECREDUCE_XOR, VT, Expand);
676 setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand);
677 setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand);
678 setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand);
679 setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
680 setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
681 setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
667682 }
668683
669684 // Most targets ignore the @llvm.prefetch intrinsic.
697697 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
698698
699699 // Vector reductions
700 for (MVT VT : MVT::integer_valuetypes()) {
700 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
701 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
701702 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
702703 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
703704 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
704705 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
705706 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
706707 }
707 for (MVT VT : MVT::fp_valuetypes()) {
708 for (MVT VT : { MVT::v4f16, MVT::v2f32,
709 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
708710 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
709711 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
710712 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
2
3 declare i1 @llvm.experimental.vector.reduce.add.i1.v1i1(<1 x i1> %a)
4 declare i8 @llvm.experimental.vector.reduce.add.i8.v1i8(<1 x i8> %a)
5 declare i16 @llvm.experimental.vector.reduce.add.i16.v1i16(<1 x i16> %a)
6 declare i24 @llvm.experimental.vector.reduce.add.i24.v1i24(<1 x i24> %a)
7 declare i32 @llvm.experimental.vector.reduce.add.i32.v1i32(<1 x i32> %a)
8 declare i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> %a)
9 declare i128 @llvm.experimental.vector.reduce.add.i128.v1i128(<1 x i128> %a)
10
11 declare i8 @llvm.experimental.vector.reduce.add.i8.v3i8(<3 x i8> %a)
12 declare i8 @llvm.experimental.vector.reduce.add.i8.v9i8(<9 x i8> %a)
13 declare i32 @llvm.experimental.vector.reduce.add.i32.v3i32(<3 x i32> %a)
14 declare i1 @llvm.experimental.vector.reduce.add.i1.v4i1(<4 x i1> %a)
15 declare i24 @llvm.experimental.vector.reduce.add.i24.v4i24(<4 x i24> %a)
16 declare i128 @llvm.experimental.vector.reduce.add.i128.v2i128(<2 x i128> %a)
17 declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> %a)
18
19 define i1 @test_v1i1(<1 x i1> %a) nounwind {
20 ; CHECK-LABEL: test_v1i1:
21 ; CHECK: // %bb.0:
22 ; CHECK-NEXT: and w0, w0, #0x1
23 ; CHECK-NEXT: ret
24 %b = call i1 @llvm.experimental.vector.reduce.add.i1.v1i1(<1 x i1> %a)
25 ret i1 %b
26 }
27
28 define i8 @test_v1i8(<1 x i8> %a) nounwind {
29 ; CHECK-LABEL: test_v1i8:
30 ; CHECK: // %bb.0:
31 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
32 ; CHECK-NEXT: umov w0, v0.b[0]
33 ; CHECK-NEXT: ret
34 %b = call i8 @llvm.experimental.vector.reduce.add.i8.v1i8(<1 x i8> %a)
35 ret i8 %b
36 }
37
38 define i16 @test_v1i16(<1 x i16> %a) nounwind {
39 ; CHECK-LABEL: test_v1i16:
40 ; CHECK: // %bb.0:
41 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
42 ; CHECK-NEXT: umov w0, v0.h[0]
43 ; CHECK-NEXT: ret
44 %b = call i16 @llvm.experimental.vector.reduce.add.i16.v1i16(<1 x i16> %a)
45 ret i16 %b
46 }
47
48 define i24 @test_v1i24(<1 x i24> %a) nounwind {
49 ; CHECK-LABEL: test_v1i24:
50 ; CHECK: // %bb.0:
51 ; CHECK-NEXT: ret
52 %b = call i24 @llvm.experimental.vector.reduce.add.i24.v1i24(<1 x i24> %a)
53 ret i24 %b
54 }
55
56 define i32 @test_v1i32(<1 x i32> %a) nounwind {
57 ; CHECK-LABEL: test_v1i32:
58 ; CHECK: // %bb.0:
59 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
60 ; CHECK-NEXT: fmov w0, s0
61 ; CHECK-NEXT: ret
62 %b = call i32 @llvm.experimental.vector.reduce.add.i32.v1i32(<1 x i32> %a)
63 ret i32 %b
64 }
65
66 define i64 @test_v1i64(<1 x i64> %a) nounwind {
67 ; CHECK-LABEL: test_v1i64:
68 ; CHECK: // %bb.0:
69 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
70 ; CHECK-NEXT: fmov x0, d0
71 ; CHECK-NEXT: ret
72 %b = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> %a)
73 ret i64 %b
74 }
75
76 define i128 @test_v1i128(<1 x i128> %a) nounwind {
77 ; CHECK-LABEL: test_v1i128:
78 ; CHECK: // %bb.0:
79 ; CHECK-NEXT: ret
80 %b = call i128 @llvm.experimental.vector.reduce.add.i128.v1i128(<1 x i128> %a)
81 ret i128 %b
82 }
83
84 define i8 @test_v3i8(<3 x i8> %a) nounwind {
85 ; CHECK-LABEL: test_v3i8:
86 ; CHECK: // %bb.0:
87 ; CHECK-NEXT: movi d0, #0000000000000000
88 ; CHECK-NEXT: mov v0.h[0], w0
89 ; CHECK-NEXT: mov v0.h[1], w1
90 ; CHECK-NEXT: mov v0.h[2], w2
91 ; CHECK-NEXT: addv h0, v0.4h
92 ; CHECK-NEXT: fmov w0, s0
93 ; CHECK-NEXT: ret
94 %b = call i8 @llvm.experimental.vector.reduce.add.i8.v3i8(<3 x i8> %a)
95 ret i8 %b
96 }
97
98 define i8 @test_v9i8(<9 x i8> %a) nounwind {
99 ; CHECK-LABEL: test_v9i8:
100 ; CHECK: // %bb.0:
101 ; CHECK-NEXT: mov v0.b[9], wzr
102 ; CHECK-NEXT: mov v0.b[10], wzr
103 ; CHECK-NEXT: mov v0.b[11], wzr
104 ; CHECK-NEXT: mov v0.b[12], wzr
105 ; CHECK-NEXT: mov v0.b[13], wzr
106 ; CHECK-NEXT: mov v0.b[14], wzr
107 ; CHECK-NEXT: mov v0.b[15], wzr
108 ; CHECK-NEXT: addv b0, v0.16b
109 ; CHECK-NEXT: fmov w0, s0
110 ; CHECK-NEXT: ret
111 %b = call i8 @llvm.experimental.vector.reduce.add.i8.v9i8(<9 x i8> %a)
112 ret i8 %b
113 }
114
115 define i32 @test_v3i32(<3 x i32> %a) nounwind {
116 ; CHECK-LABEL: test_v3i32:
117 ; CHECK: // %bb.0:
118 ; CHECK-NEXT: mov v0.s[3], wzr
119 ; CHECK-NEXT: addv s0, v0.4s
120 ; CHECK-NEXT: fmov w0, s0
121 ; CHECK-NEXT: ret
122 %b = call i32 @llvm.experimental.vector.reduce.add.i32.v3i32(<3 x i32> %a)
123 ret i32 %b
124 }
125
126 define i1 @test_v4i1(<4 x i1> %a) nounwind {
127 ; CHECK-LABEL: test_v4i1:
128 ; CHECK: // %bb.0:
129 ; CHECK-NEXT: addv h0, v0.4h
130 ; CHECK-NEXT: fmov w8, s0
131 ; CHECK-NEXT: and w0, w8, #0x1
132 ; CHECK-NEXT: ret
133 %b = call i1 @llvm.experimental.vector.reduce.add.i1.v4i1(<4 x i1> %a)
134 ret i1 %b
135 }
136
137 define i24 @test_v4i24(<4 x i24> %a) nounwind {
138 ; CHECK-LABEL: test_v4i24:
139 ; CHECK: // %bb.0:
140 ; CHECK-NEXT: addv s0, v0.4s
141 ; CHECK-NEXT: fmov w0, s0
142 ; CHECK-NEXT: ret
143 %b = call i24 @llvm.experimental.vector.reduce.add.i24.v4i24(<4 x i24> %a)
144 ret i24 %b
145 }
146
147 define i128 @test_v2i128(<2 x i128> %a) nounwind {
148 ; CHECK-LABEL: test_v2i128:
149 ; CHECK: // %bb.0:
150 ; CHECK-NEXT: adds x0, x0, x2
151 ; CHECK-NEXT: adcs x1, x1, x3
152 ; CHECK-NEXT: ret
153 %b = call i128 @llvm.experimental.vector.reduce.add.i128.v2i128(<2 x i128> %a)
154 ret i128 %b
155 }
156
157 define i32 @test_v16i32(<16 x i32> %a) nounwind {
158 ; CHECK-LABEL: test_v16i32:
159 ; CHECK: // %bb.0:
160 ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
161 ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
162 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
163 ; CHECK-NEXT: addv s0, v0.4s
164 ; CHECK-NEXT: fmov w0, s0
165 ; CHECK-NEXT: ret
166 %b = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> %a)
167 ret i32 %b
168 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
2
3 declare i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> %a)
4 declare i8 @llvm.experimental.vector.reduce.and.i8.v1i8(<1 x i8> %a)
5 declare i16 @llvm.experimental.vector.reduce.and.i16.v1i16(<1 x i16> %a)
6 declare i24 @llvm.experimental.vector.reduce.and.i24.v1i24(<1 x i24> %a)
7 declare i32 @llvm.experimental.vector.reduce.and.i32.v1i32(<1 x i32> %a)
8 declare i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> %a)
9 declare i128 @llvm.experimental.vector.reduce.and.i128.v1i128(<1 x i128> %a)
10
11 declare i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8> %a)
12 declare i8 @llvm.experimental.vector.reduce.and.i8.v9i8(<9 x i8> %a)
13 declare i32 @llvm.experimental.vector.reduce.and.i32.v3i32(<3 x i32> %a)
14 declare i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> %a)
15 declare i24 @llvm.experimental.vector.reduce.and.i24.v4i24(<4 x i24> %a)
16 declare i128 @llvm.experimental.vector.reduce.and.i128.v2i128(<2 x i128> %a)
17 declare i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> %a)
18
19 define i1 @test_v1i1(<1 x i1> %a) nounwind {
20 ; CHECK-LABEL: test_v1i1:
21 ; CHECK: // %bb.0:
22 ; CHECK-NEXT: and w0, w0, #0x1
23 ; CHECK-NEXT: ret
24 %b = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> %a)
25 ret i1 %b
26 }
27
28 define i8 @test_v1i8(<1 x i8> %a) nounwind {
29 ; CHECK-LABEL: test_v1i8:
30 ; CHECK: // %bb.0:
31 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
32 ; CHECK-NEXT: umov w0, v0.b[0]
33 ; CHECK-NEXT: ret
34 %b = call i8 @llvm.experimental.vector.reduce.and.i8.v1i8(<1 x i8> %a)
35 ret i8 %b
36 }
37
38 define i16 @test_v1i16(<1 x i16> %a) nounwind {
39 ; CHECK-LABEL: test_v1i16:
40 ; CHECK: // %bb.0:
41 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
42 ; CHECK-NEXT: umov w0, v0.h[0]
43 ; CHECK-NEXT: ret
44 %b = call i16 @llvm.experimental.vector.reduce.and.i16.v1i16(<1 x i16> %a)
45 ret i16 %b
46 }
47
48 define i24 @test_v1i24(<1 x i24> %a) nounwind {
49 ; CHECK-LABEL: test_v1i24:
50 ; CHECK: // %bb.0:
51 ; CHECK-NEXT: ret
52 %b = call i24 @llvm.experimental.vector.reduce.and.i24.v1i24(<1 x i24> %a)
53 ret i24 %b
54 }
55
56 define i32 @test_v1i32(<1 x i32> %a) nounwind {
57 ; CHECK-LABEL: test_v1i32:
58 ; CHECK: // %bb.0:
59 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
60 ; CHECK-NEXT: fmov w0, s0
61 ; CHECK-NEXT: ret
62 %b = call i32 @llvm.experimental.vector.reduce.and.i32.v1i32(<1 x i32> %a)
63 ret i32 %b
64 }
65
66 define i64 @test_v1i64(<1 x i64> %a) nounwind {
67 ; CHECK-LABEL: test_v1i64:
68 ; CHECK: // %bb.0:
69 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
70 ; CHECK-NEXT: fmov x0, d0
71 ; CHECK-NEXT: ret
72 %b = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> %a)
73 ret i64 %b
74 }
75
76 define i128 @test_v1i128(<1 x i128> %a) nounwind {
77 ; CHECK-LABEL: test_v1i128:
78 ; CHECK: // %bb.0:
79 ; CHECK-NEXT: ret
80 %b = call i128 @llvm.experimental.vector.reduce.and.i128.v1i128(<1 x i128> %a)
81 ret i128 %b
82 }
83
84 define i8 @test_v3i8(<3 x i8> %a) nounwind {
85 ; CHECK-LABEL: test_v3i8:
86 ; CHECK: // %bb.0:
87 ; CHECK-NEXT: and w8, w0, w1
88 ; CHECK-NEXT: and w8, w8, w2
89 ; CHECK-NEXT: and w0, w8, #0xff
90 ; CHECK-NEXT: ret
91 %b = call i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8> %a)
92 ret i8 %b
93 }
94
95 define i8 @test_v9i8(<9 x i8> %a) nounwind {
96 ; CHECK-LABEL: test_v9i8:
97 ; CHECK: // %bb.0:
98 ; CHECK-NEXT: mov w8, #-1
99 ; CHECK-NEXT: mov v0.b[9], w8
100 ; CHECK-NEXT: mov v0.b[10], w8
101 ; CHECK-NEXT: mov v0.b[11], w8
102 ; CHECK-NEXT: mov v0.b[12], w8
103 ; CHECK-NEXT: mov v0.b[13], w8
104 ; CHECK-NEXT: mov v0.b[14], w8
105 ; CHECK-NEXT: mov v0.b[15], w8
106 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
107 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
108 ; CHECK-NEXT: umov w8, v0.b[1]
109 ; CHECK-NEXT: umov w9, v0.b[0]
110 ; CHECK-NEXT: and w8, w9, w8
111 ; CHECK-NEXT: umov w9, v0.b[2]
112 ; CHECK-NEXT: and w8, w8, w9
113 ; CHECK-NEXT: umov w9, v0.b[3]
114 ; CHECK-NEXT: and w8, w8, w9
115 ; CHECK-NEXT: umov w9, v0.b[4]
116 ; CHECK-NEXT: and w8, w8, w9
117 ; CHECK-NEXT: umov w9, v0.b[5]
118 ; CHECK-NEXT: and w8, w8, w9
119 ; CHECK-NEXT: umov w9, v0.b[6]
120 ; CHECK-NEXT: and w8, w8, w9
121 ; CHECK-NEXT: umov w9, v0.b[7]
122 ; CHECK-NEXT: and w0, w8, w9
123 ; CHECK-NEXT: ret
124 %b = call i8 @llvm.experimental.vector.reduce.and.i8.v9i8(<9 x i8> %a)
125 ret i8 %b
126 }
127
128 define i32 @test_v3i32(<3 x i32> %a) nounwind {
129 ; CHECK-LABEL: test_v3i32:
130 ; CHECK: // %bb.0:
131 ; CHECK-NEXT: mov w8, #-1
132 ; CHECK-NEXT: mov v0.s[3], w8
133 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
134 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
135 ; CHECK-NEXT: mov w8, v0.s[1]
136 ; CHECK-NEXT: fmov w9, s0
137 ; CHECK-NEXT: and w0, w9, w8
138 ; CHECK-NEXT: ret
139 %b = call i32 @llvm.experimental.vector.reduce.and.i32.v3i32(<3 x i32> %a)
140 ret i32 %b
141 }
142
143 define i1 @test_v4i1(<4 x i1> %a) nounwind {
144 ; CHECK-LABEL: test_v4i1:
145 ; CHECK: // %bb.0:
146 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
147 ; CHECK-NEXT: umov w10, v0.h[1]
148 ; CHECK-NEXT: umov w11, v0.h[0]
149 ; CHECK-NEXT: umov w9, v0.h[2]
150 ; CHECK-NEXT: and w10, w11, w10
151 ; CHECK-NEXT: umov w8, v0.h[3]
152 ; CHECK-NEXT: and w9, w10, w9
153 ; CHECK-NEXT: and w8, w9, w8
154 ; CHECK-NEXT: and w0, w8, #0x1
155 ; CHECK-NEXT: ret
156 %b = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> %a)
157 ret i1 %b
158 }
159
160 define i24 @test_v4i24(<4 x i24> %a) nounwind {
161 ; CHECK-LABEL: test_v4i24:
162 ; CHECK: // %bb.0:
163 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
164 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
165 ; CHECK-NEXT: mov w8, v0.s[1]
166 ; CHECK-NEXT: fmov w9, s0
167 ; CHECK-NEXT: and w0, w9, w8
168 ; CHECK-NEXT: ret
169 %b = call i24 @llvm.experimental.vector.reduce.and.i24.v4i24(<4 x i24> %a)
170 ret i24 %b
171 }
172
173 define i128 @test_v2i128(<2 x i128> %a) nounwind {
174 ; CHECK-LABEL: test_v2i128:
175 ; CHECK: // %bb.0:
176 ; CHECK-NEXT: and x0, x0, x2
177 ; CHECK-NEXT: and x1, x1, x3
178 ; CHECK-NEXT: ret
179 %b = call i128 @llvm.experimental.vector.reduce.and.i128.v2i128(<2 x i128> %a)
180 ret i128 %b
181 }
182
183 define i32 @test_v16i32(<16 x i32> %a) nounwind {
184 ; CHECK-LABEL: test_v16i32:
185 ; CHECK: // %bb.0:
186 ; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
187 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
188 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
189 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
190 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
191 ; CHECK-NEXT: mov w8, v0.s[1]
192 ; CHECK-NEXT: fmov w9, s0
193 ; CHECK-NEXT: and w0, w9, w8
194 ; CHECK-NEXT: ret
195 %b = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> %a)
196 ret i32 %b
197 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
2
3 declare half @llvm.experimental.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
4 declare float @llvm.experimental.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
5 declare double @llvm.experimental.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
6 declare fp128 @llvm.experimental.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)
7
8 declare float @llvm.experimental.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
9 declare fp128 @llvm.experimental.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
10 declare float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
11
12 define half @test_v1f16(<1 x half> %a) nounwind {
13 ; CHECK-LABEL: test_v1f16:
14 ; CHECK: // %bb.0:
15 ; CHECK-NEXT: ret
16 %b = call fast nnan half @llvm.experimental.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a)
17 ret half %b
18 }
19
20 define float @test_v1f32(<1 x float> %a) nounwind {
21 ; CHECK-LABEL: test_v1f32:
22 ; CHECK: // %bb.0:
23 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
24 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
25 ; CHECK-NEXT: ret
26 %b = call fast nnan float @llvm.experimental.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a)
27 ret float %b
28 }
29
30 define double @test_v1f64(<1 x double> %a) nounwind {
31 ; CHECK-LABEL: test_v1f64:
32 ; CHECK: // %bb.0:
33 ; CHECK-NEXT: ret
34 %b = call fast nnan double @llvm.experimental.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a)
35 ret double %b
36 }
37
38 define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
39 ; CHECK-LABEL: test_v1f128:
40 ; CHECK: // %bb.0:
41 ; CHECK-NEXT: ret
42 %b = call fast nnan fp128 @llvm.experimental.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
43 ret fp128 %b
44 }
45
46 define float @test_v3f32(<3 x float> %a) nounwind {
47 ; CHECK-LABEL: test_v3f32:
48 ; CHECK: // %bb.0:
49 ; CHECK-NEXT: fmov s1, wzr
50 ; CHECK-NEXT: mov v0.s[3], v1.s[0]
51 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
52 ; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
53 ; CHECK-NEXT: faddp s0, v0.2s
54 ; CHECK-NEXT: ret
55 %b = call fast nnan float @llvm.experimental.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a)
56 ret float %b
57 }
58
59 define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
60 ; CHECK-LABEL: test_v2f128:
61 ; CHECK: // %bb.0:
62 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
63 ; CHECK-NEXT: bl __addtf3
64 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
65 ; CHECK-NEXT: ret
66 %b = call fast nnan fp128 @llvm.experimental.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
67 ret fp128 %b
68 }
69
70 define float @test_v16f32(<16 x float> %a) nounwind {
71 ; CHECK-LABEL: test_v16f32:
72 ; CHECK: // %bb.0:
73 ; CHECK-NEXT: fadd v1.4s, v1.4s, v3.4s
74 ; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
75 ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
76 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
77 ; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
78 ; CHECK-NEXT: faddp s0, v0.2s
79 ; CHECK-NEXT: ret
80 %b = call fast nnan float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a)
81 ret float %b
82 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
2
3 declare half @llvm.experimental.vector.reduce.fmax.f16.v1f16(<1 x half> %a)
4 declare float @llvm.experimental.vector.reduce.fmax.f32.v1f32(<1 x float> %a)
5 declare double @llvm.experimental.vector.reduce.fmax.f64.v1f64(<1 x double> %a)
6 declare fp128 @llvm.experimental.vector.reduce.fmax.f128.v1f128(<1 x fp128> %a)
7
8 declare float @llvm.experimental.vector.reduce.fmax.f32.v3f32(<3 x float> %a)
9 declare fp128 @llvm.experimental.vector.reduce.fmax.f128.v2f128(<2 x fp128> %a)
10 declare float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float> %a)
11
12 define half @test_v1f16(<1 x half> %a) nounwind {
13 ; CHECK-LABEL: test_v1f16:
14 ; CHECK: // %bb.0:
15 ; CHECK-NEXT: ret
16 %b = call nnan half @llvm.experimental.vector.reduce.fmax.f16.v1f16(<1 x half> %a)
17 ret half %b
18 }
19
20 define float @test_v1f32(<1 x float> %a) nounwind {
21 ; CHECK-LABEL: test_v1f32:
22 ; CHECK: // %bb.0:
23 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
24 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
25 ; CHECK-NEXT: ret
26 %b = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v1f32(<1 x float> %a)
27 ret float %b
28 }
29
30 define double @test_v1f64(<1 x double> %a) nounwind {
31 ; CHECK-LABEL: test_v1f64:
32 ; CHECK: // %bb.0:
33 ; CHECK-NEXT: ret
34 %b = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v1f64(<1 x double> %a)
35 ret double %b
36 }
37
38 define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
39 ; CHECK-LABEL: test_v1f128:
40 ; CHECK: // %bb.0:
41 ; CHECK-NEXT: ret
42 %b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.f128.v1f128(<1 x fp128> %a)
43 ret fp128 %b
44 }
45
46 define float @test_v3f32(<3 x float> %a) nounwind {
47 ; CHECK-LABEL: test_v3f32:
48 ; CHECK: // %bb.0:
49 ; CHECK-NEXT: orr w8, wzr, #0x7f800000
50 ; CHECK-NEXT: fmov s1, w8
51 ; CHECK-NEXT: mov v0.s[3], v1.s[0]
52 ; CHECK-NEXT: fmaxnmv s0, v0.4s
53 ; CHECK-NEXT: ret
54 %b = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v3f32(<3 x float> %a)
55 ret float %b
56 }
57
58 define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
59 ; CHECK-LABEL: test_v2f128:
60 ; CHECK: // %bb.0:
61 ; CHECK-NEXT: b fmaxl
62 %b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.f128.v2f128(<2 x fp128> %a)
63 ret fp128 %b
64 }
65
66 define float @test_v16f32(<16 x float> %a) nounwind {
67 ; CHECK-LABEL: test_v16f32:
68 ; CHECK: // %bb.0:
69 ; CHECK-NEXT: fmaxnm v1.4s, v1.4s, v3.4s
70 ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v2.4s
71 ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
72 ; CHECK-NEXT: fmaxnmv s0, v0.4s
73 ; CHECK-NEXT: ret
74 %b = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float> %a)
75 ret float %b
76 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
2
3 declare i1 @llvm.experimental.vector.reduce.umax.i1.v1i1(<1 x i1> %a)
4 declare i8 @llvm.experimental.vector.reduce.umax.i8.v1i8(<1 x i8> %a)
5 declare i16 @llvm.experimental.vector.reduce.umax.i16.v1i16(<1 x i16> %a)
6 declare i24 @llvm.experimental.vector.reduce.umax.i24.v1i24(<1 x i24> %a)
7 declare i32 @llvm.experimental.vector.reduce.umax.i32.v1i32(<1 x i32> %a)
8 declare i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> %a)
9 declare i128 @llvm.experimental.vector.reduce.umax.i128.v1i128(<1 x i128> %a)
10
11 declare i8 @llvm.experimental.vector.reduce.umax.i8.v3i8(<3 x i8> %a)
12 declare i8 @llvm.experimental.vector.reduce.umax.i8.v9i8(<9 x i8> %a)
13 declare i32 @llvm.experimental.vector.reduce.umax.i32.v3i32(<3 x i32> %a)
14 declare i1 @llvm.experimental.vector.reduce.umax.i1.v4i1(<4 x i1> %a)
15 declare i24 @llvm.experimental.vector.reduce.umax.i24.v4i24(<4 x i24> %a)
16 declare i128 @llvm.experimental.vector.reduce.umax.i128.v2i128(<2 x i128> %a)
17 declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %a)
18
19 define i1 @test_v1i1(<1 x i1> %a) nounwind {
20 ; CHECK-LABEL: test_v1i1:
21 ; CHECK: // %bb.0:
22 ; CHECK-NEXT: and w0, w0, #0x1
23 ; CHECK-NEXT: ret
24 %b = call i1 @llvm.experimental.vector.reduce.umax.i1.v1i1(<1 x i1> %a)
25 ret i1 %b
26 }
27
28 define i8 @test_v1i8(<1 x i8> %a) nounwind {
29 ; CHECK-LABEL: test_v1i8:
30 ; CHECK: // %bb.0:
31 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
32 ; CHECK-NEXT: umov w0, v0.b[0]
33 ; CHECK-NEXT: ret
34 %b = call i8 @llvm.experimental.vector.reduce.umax.i8.v1i8(<1 x i8> %a)
35 ret i8 %b
36 }
37
38 define i16 @test_v1i16(<1 x i16> %a) nounwind {
39 ; CHECK-LABEL: test_v1i16:
40 ; CHECK: // %bb.0:
41 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
42 ; CHECK-NEXT: umov w0, v0.h[0]
43 ; CHECK-NEXT: ret
44 %b = call i16 @llvm.experimental.vector.reduce.umax.i16.v1i16(<1 x i16> %a)
45 ret i16 %b
46 }
47
48 define i24 @test_v1i24(<1 x i24> %a) nounwind {
49 ; CHECK-LABEL: test_v1i24:
50 ; CHECK: // %bb.0:
51 ; CHECK-NEXT: ret
52 %b = call i24 @llvm.experimental.vector.reduce.umax.i24.v1i24(<1 x i24> %a)
53 ret i24 %b
54 }
55
56 define i32 @test_v1i32(<1 x i32> %a) nounwind {
57 ; CHECK-LABEL: test_v1i32:
58 ; CHECK: // %bb.0:
59 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
60 ; CHECK-NEXT: fmov w0, s0
61 ; CHECK-NEXT: ret
62 %b = call i32 @llvm.experimental.vector.reduce.umax.i32.v1i32(<1 x i32> %a)
63 ret i32 %b
64 }
65
66 define i64 @test_v1i64(<1 x i64> %a) nounwind {
67 ; CHECK-LABEL: test_v1i64:
68 ; CHECK: // %bb.0:
69 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
70 ; CHECK-NEXT: fmov x0, d0
71 ; CHECK-NEXT: ret
72 %b = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> %a)
73 ret i64 %b
74 }
75
76 define i128 @test_v1i128(<1 x i128> %a) nounwind {
77 ; CHECK-LABEL: test_v1i128:
78 ; CHECK: // %bb.0:
79 ; CHECK-NEXT: ret
80 %b = call i128 @llvm.experimental.vector.reduce.umax.i128.v1i128(<1 x i128> %a)
81 ret i128 %b
82 }
83
84 define i8 @test_v3i8(<3 x i8> %a) nounwind {
85 ; CHECK-LABEL: test_v3i8:
86 ; CHECK: // %bb.0:
87 ; CHECK-NEXT: movi d0, #0000000000000000
88 ; CHECK-NEXT: mov v0.h[0], w0
89 ; CHECK-NEXT: mov v0.h[1], w1
90 ; CHECK-NEXT: mov v0.h[2], w2
91 ; CHECK-NEXT: bic v0.4h, #255, lsl #8
92 ; CHECK-NEXT: umaxv h0, v0.4h
93 ; CHECK-NEXT: fmov w0, s0
94 ; CHECK-NEXT: ret
95 %b = call i8 @llvm.experimental.vector.reduce.umax.i8.v3i8(<3 x i8> %a)
96 ret i8 %b
97 }
98
99 define i8 @test_v9i8(<9 x i8> %a) nounwind {
100 ; CHECK-LABEL: test_v9i8:
101 ; CHECK: // %bb.0:
102 ; CHECK-NEXT: mov v0.b[9], wzr
103 ; CHECK-NEXT: mov v0.b[10], wzr
104 ; CHECK-NEXT: mov v0.b[11], wzr
105 ; CHECK-NEXT: mov v0.b[12], wzr
106 ; CHECK-NEXT: mov v0.b[13], wzr
107 ; CHECK-NEXT: mov v0.b[14], wzr
108 ; CHECK-NEXT: mov v0.b[15], wzr
109 ; CHECK-NEXT: umaxv b0, v0.16b
110 ; CHECK-NEXT: fmov w0, s0
111 ; CHECK-NEXT: ret
112 %b = call i8 @llvm.experimental.vector.reduce.umax.i8.v9i8(<9 x i8> %a)
113 ret i8 %b
114 }
115
116 define i32 @test_v3i32(<3 x i32> %a) nounwind {
117 ; CHECK-LABEL: test_v3i32:
118 ; CHECK: // %bb.0:
119 ; CHECK-NEXT: mov v0.s[3], wzr
120 ; CHECK-NEXT: umaxv s0, v0.4s
121 ; CHECK-NEXT: fmov w0, s0
122 ; CHECK-NEXT: ret
123 %b = call i32 @llvm.experimental.vector.reduce.umax.i32.v3i32(<3 x i32> %a)
124 ret i32 %b
125 }
126
127 define i1 @test_v4i1(<4 x i1> %a) nounwind {
128 ; CHECK-LABEL: test_v4i1:
129 ; CHECK: // %bb.0:
130 ; CHECK-NEXT: movi v1.4h, #1
131 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
132 ; CHECK-NEXT: umaxv h0, v0.4h
133 ; CHECK-NEXT: fmov w8, s0
134 ; CHECK-NEXT: and w0, w8, #0x1
135 ; CHECK-NEXT: ret
136 %b = call i1 @llvm.experimental.vector.reduce.umax.i1.v4i1(<4 x i1> %a)
137 ret i1 %b
138 }
139
140 define i24 @test_v4i24(<4 x i24> %a) nounwind {
141 ; CHECK-LABEL: test_v4i24:
142 ; CHECK: // %bb.0:
143 ; CHECK-NEXT: bic v0.4s, #255, lsl #24
144 ; CHECK-NEXT: umaxv s0, v0.4s
145 ; CHECK-NEXT: fmov w0, s0
146 ; CHECK-NEXT: ret
147 %b = call i24 @llvm.experimental.vector.reduce.umax.i24.v4i24(<4 x i24> %a)
148 ret i24 %b
149 }
150
151 define i128 @test_v2i128(<2 x i128> %a) nounwind {
152 ; CHECK-LABEL: test_v2i128:
153 ; CHECK: // %bb.0:
154 ; CHECK-NEXT: cmp x0, x2
155 ; CHECK-NEXT: csel x8, x0, x2, hi
156 ; CHECK-NEXT: cmp x1, x3
157 ; CHECK-NEXT: csel x9, x0, x2, hi
158 ; CHECK-NEXT: csel x0, x8, x9, eq
159 ; CHECK-NEXT: csel x1, x1, x3, hi
160 ; CHECK-NEXT: ret
161 %b = call i128 @llvm.experimental.vector.reduce.umax.i128.v2i128(<2 x i128> %a)
162 ret i128 %b
163 }
164
165 define i32 @test_v16i32(<16 x i32> %a) nounwind {
166 ; CHECK-LABEL: test_v16i32:
167 ; CHECK: // %bb.0:
168 ; CHECK-NEXT: umax v1.4s, v1.4s, v3.4s
169 ; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s
170 ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
171 ; CHECK-NEXT: umaxv s0, v0.4s
172 ; CHECK-NEXT: fmov w0, s0
173 ; CHECK-NEXT: ret
174 %b = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %a)
175 ret i32 %b
176 }