llvm.org GIT mirror llvm / 15c5be1
Revert r248483, r242546, r242545, and r242409 - absdiff intrinsics After much discussion, ending here: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151123/315620.html it has been decided that, instead of having the vectorizer directly generate special absdiff and horizontal-add intrinsics, we'll recognize the relevant reduction patterns during CodeGen. Accordingly, these intrinsics are not needed (the operations they represent can be pattern matched, as is already done in some backends). Thus, we're backing these out in favor of the current development work. r248483 - Codegen: Fix llvm.*absdiff semantic. r242546 - [ARM] Use [SU]ABSDIFF nodes instead of intrinsics for VABD/VABA r242545 - [AArch64] Use [SU]ABSDIFF nodes instead of intrinsics for ABD/ABA r242409 - [Codegen] Add intrinsics 'absdiff' and corresponding SDNodes for absolute difference operation git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255387 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 4 years ago
16 changed file(s) with 36 addition(s) and 400 deletion(s). Raw diff Collapse all Expand all
1121611216
1121711217 %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c
1121811218
11219
11220 '``llvm.uabsdiff.*``' and '``llvm.sabsdiff.*``' Intrinsics
11221 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
11222
11223 Syntax:
11224 """""""
11225 This is an overloaded intrinsic. The loaded data is a vector of any integer bit width.
11226
11227 .. code-block:: llvm
11228
11229 declare <4 x integer> @llvm.uabsdiff.v4i32(<4 x integer> %a, <4 x integer> %b)
11230
11231
11232 Overview:
11233 """""""""
11234
11235 The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference
11236 of the two operands, treating them both as unsigned integers. The intermediate
11237 calculations are computed using infinitely precise unsigned arithmetic. The final
11238 result will be truncated to the given type.
11239
11240 The ``llvm.sabsdiff`` intrinsic returns a vector result of the absolute difference of
11241 the two operands, treating them both as signed integers. If the result overflows, the
11242 behavior is undefined.
11243
11244 .. note::
11245
11246 These intrinsics are primarily used during the code generation stage of compilation.
11247 They are generated by compiler passes such as the Loop and SLP vectorizers. It is not
11248 recommended for users to create them manually.
11249
11250 Arguments:
11251 """"""""""
11252
11253 Both intrinsics take two integer of the same bitwidth.
11254
11255 Semantics:
11256 """"""""""
11257
11258 The expression::
11259
11260 call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
11261
11262 is equivalent to::
11263
11264 %1 = zext <4 x i32> %a to <4 x i64>
11265 %2 = zext <4 x i32> %b to <4 x i64>
11266 %sub = sub <4 x i64> %1, %2
11267 %trunc = trunc <4 x i64> to <4 x i32>
11268
11269 and the expression::
11270
11271 call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
11272
11273 is equivalent to::
11274
11275 %sub = sub nsw <4 x i32> %a, %b
11276 %ispos = icmp sge <4 x i32> %sub, zeroinitializer
11277 %neg = sub nsw <4 x i32> zeroinitializer, %sub
11278 %1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
11279
11280
1128111219 Half Precision Floating Point Intrinsics
1128211220 ----------------------------------------
1128311221
336336
337337 /// Byte Swap and Counting operators.
338338 BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE,
339
340 /// [SU]ABSDIFF - Signed/Unsigned absolute difference of two input integer
341 /// vector. These nodes are generated from llvm.*absdiff* intrinsics.
342 SABSDIFF, UABSDIFF,
343339
344340 /// Bit counting operators with an undefined result for zero inputs.
345341 CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
630630 def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
631631 [], "llvm.clear_cache">;
632632
633 // Calculate the Absolute Differences of the two input vectors.
634 def int_sabsdiff : Intrinsic<[llvm_anyvector_ty],
635 [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
636 def int_uabsdiff : Intrinsic<[llvm_anyvector_ty],
637 [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
638
639633 //===-------------------------- Masked Intrinsics -------------------------===//
640634 //
641635 def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>,
395395 def umin : SDNode<"ISD::UMIN" , SDTIntBinOp>;
396396 def umax : SDNode<"ISD::UMAX" , SDTIntBinOp>;
397397
398 def sabsdiff : SDNode<"ISD::SABSDIFF" , SDTIntBinOp>;
399 def uabsdiff : SDNode<"ISD::UABSDIFF" , SDTIntBinOp>;
400398 def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
401399 def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
402400 def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
146146 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
147147 Res = PromoteIntRes_AtomicCmpSwap(cast(N), ResNo);
148148 break;
149 case ISD::UABSDIFF:
150 case ISD::SABSDIFF:
151 Res = PromoteIntRes_SimpleIntBinOp(N);
152 break;
153149 }
154150
155151 // If the result is null then the sub-method took care of registering it.
104104 SDValue ExpandLoad(SDValue Op);
105105 SDValue ExpandStore(SDValue Op);
106106 SDValue ExpandFNEG(SDValue Op);
107 SDValue ExpandABSDIFF(SDValue Op);
108107
109108 /// \brief Implements vector promotion.
110109 ///
329328 case ISD::SMAX:
330329 case ISD::UMIN:
331330 case ISD::UMAX:
332 case ISD::UABSDIFF:
333 case ISD::SABSDIFF:
334331 QueryType = Node->getValueType(0);
335332 break;
336333 case ISD::FP_ROUND_INREG:
717714 return ExpandFNEG(Op);
718715 case ISD::SETCC:
719716 return UnrollVSETCC(Op);
720 case ISD::UABSDIFF:
721 case ISD::SABSDIFF:
722 return ExpandABSDIFF(Op);
723717 default:
724718 return DAG.UnrollVectorOp(Op.getNode());
725719 }
726 }
727
728 SDValue VectorLegalizer::ExpandABSDIFF(SDValue Op) {
729 SDLoc dl(Op);
730 SDValue Op0 = Op.getOperand(0);
731 SDValue Op1 = Op.getOperand(1);
732 EVT VT = Op.getValueType();
733
734 // For unsigned intrinsic, promote the type to handle unsigned overflow.
735 bool isUabsdiff = (Op->getOpcode() == ISD::UABSDIFF);
736 if (isUabsdiff) {
737 VT = VT.widenIntegerVectorElementType(*DAG.getContext());
738 Op0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op0);
739 Op1 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op1);
740 }
741
742 SDNodeFlags Flags;
743 Flags.setNoSignedWrap(!isUabsdiff);
744 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op0, Op1, &Flags);
745 if (isUabsdiff)
746 return DAG.getNode(ISD::TRUNCATE, dl, Op.getValueType(), Sub);
747
748 SDValue Cmp =
749 DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(),
750 *DAG.getContext(), VT),
751 Sub, DAG.getConstant(0, dl, VT), DAG.getCondCode(ISD::SETGE));
752 SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Sub, &Flags);
753 return DAG.getNode(ISD::VSELECT, dl, VT, Cmp, Sub, Neg);
754720 }
755721
756722 SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
683683 case ISD::SMAX:
684684 case ISD::UMIN:
685685 case ISD::UMAX:
686 case ISD::UABSDIFF:
687 case ISD::SABSDIFF:
688686 SplitVecRes_BinOp(N, Lo, Hi);
689687 break;
690688 case ISD::FMA:
48784878 setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
48794879 getValue(I.getArgOperand(0)).getValueType(),
48804880 getValue(I.getArgOperand(0))));
4881 return nullptr;
4882 case Intrinsic::uabsdiff:
4883 setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl,
4884 getValue(I.getArgOperand(0)).getValueType(),
4885 getValue(I.getArgOperand(0)),
4886 getValue(I.getArgOperand(1))));
4887 return nullptr;
4888 case Intrinsic::sabsdiff:
4889 setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl,
4890 getValue(I.getArgOperand(0)).getValueType(),
4891 getValue(I.getArgOperand(0)),
4892 getValue(I.getArgOperand(1))));
48934881 return nullptr;
48944882 case Intrinsic::cttz: {
48954883 SDValue Arg = getValue(I.getArgOperand(0));
234234 case ISD::SHL_PARTS: return "shl_parts";
235235 case ISD::SRA_PARTS: return "sra_parts";
236236 case ISD::SRL_PARTS: return "srl_parts";
237 case ISD::UABSDIFF: return "uabsdiff";
238 case ISD::SABSDIFF: return "sabsdiff";
239237
240238 // Conversion operators.
241239 case ISD::SIGN_EXTEND: return "sign_extend";
825825 setOperationAction(ISD::USUBO, VT, Expand);
826826 setOperationAction(ISD::SMULO, VT, Expand);
827827 setOperationAction(ISD::UMULO, VT, Expand);
828 setOperationAction(ISD::UABSDIFF, VT, Expand);
829 setOperationAction(ISD::SABSDIFF, VT, Expand);
828
830829 setOperationAction(ISD::BITREVERSE, VT, Expand);
831830
832831 // These library functions default to expand.
690690 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
691691 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
692692
693 // [SU][MIN|MAX] and [SU]ABSDIFF are available for all NEON types apart from
694 // i64.
693 // [SU][MIN|MAX] are available for all NEON types apart from i64.
695694 if (!VT.isFloatingPoint() &&
696695 VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
697 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
698 ISD::SABSDIFF, ISD::UABSDIFF})
696 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
699697 setOperationAction(Opcode, VT.getSimpleVT(), Legal);
700698
701699 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
82508248 // (aarch64_neon_umull (extract_high (v2i64 vec)))
82518249 // (extract_high (v2i64 (dup128 scalar)))))
82528250 //
8253 static SDValue tryCombineLongOpWithDup(SDNode *N,
8251 static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
82548252 TargetLowering::DAGCombinerInfo &DCI,
82558253 SelectionDAG &DAG) {
82568254 if (DCI.isBeforeLegalizeOps())
82578255 return SDValue();
82588256
8259 bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
8260 SDValue LHS = N->getOperand(IsIntrinsic ? 1 : 0);
8261 SDValue RHS = N->getOperand(IsIntrinsic ? 2 : 1);
8257 SDValue LHS = N->getOperand(1);
8258 SDValue RHS = N->getOperand(2);
82628259 assert(LHS.getValueType().is64BitVector() &&
82638260 RHS.getValueType().is64BitVector() &&
82648261 "unexpected shape for long operation");
82768273 return SDValue();
82778274 }
82788275
8279 // N could either be an intrinsic or a sabsdiff/uabsdiff node.
8280 if (IsIntrinsic)
8281 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
8282 N->getOperand(0), LHS, RHS);
8283 else
8284 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
8285 LHS, RHS);
8276 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
8277 N->getOperand(0), LHS, RHS);
82868278 }
82878279
82888280 static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
84008392 case Intrinsic::aarch64_neon_fmin:
84018393 return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
84028394 N->getOperand(1), N->getOperand(2));
8403 case Intrinsic::aarch64_neon_sabd:
8404 return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
8405 N->getOperand(1), N->getOperand(2));
8406 case Intrinsic::aarch64_neon_uabd:
8407 return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
8408 N->getOperand(1), N->getOperand(2));
84098395 case Intrinsic::aarch64_neon_fmaxnm:
84108396 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
84118397 N->getOperand(1), N->getOperand(2));
84168402 case Intrinsic::aarch64_neon_umull:
84178403 case Intrinsic::aarch64_neon_pmull:
84188404 case Intrinsic::aarch64_neon_sqdmull:
8419 return tryCombineLongOpWithDup(N, DCI, DAG);
8405 return tryCombineLongOpWithDup(IID, N, DCI, DAG);
84208406 case Intrinsic::aarch64_neon_sqshl:
84218407 case Intrinsic::aarch64_neon_uqshl:
84228408 case Intrinsic::aarch64_neon_sqshlu:
84418427 // helps the backend to decide that an sabdl2 would be useful, saving a real
84428428 // extract_high operation.
84438429 if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
8444 (N->getOperand(0).getOpcode() == ISD::SABSDIFF ||
8445 N->getOperand(0).getOpcode() == ISD::UABSDIFF)) {
8430 N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
84468431 SDNode *ABDNode = N->getOperand(0).getNode();
8447 SDValue NewABD = tryCombineLongOpWithDup(ABDNode, DCI, DAG);
8448 if (!NewABD.getNode())
8449 return SDValue();
8450
8451 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
8452 NewABD);
8432 unsigned IID = getIntrinsicID(ABDNode);
8433 if (IID == Intrinsic::aarch64_neon_sabd ||
8434 IID == Intrinsic::aarch64_neon_uabd) {
8435 SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
8436 if (!NewABD.getNode())
8437 return SDValue();
8438
8439 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
8440 NewABD);
8441 }
84538442 }
84548443
84558444 // This is effectively a custom type legalization for AArch64.
26322632 //===----------------------------------------------------------------------===//
26332633
26342634 defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
2635 uabsdiff>;
2635 int_aarch64_neon_uabd>;
26362636 // Match UABDL in log2-shuffle patterns.
26372637 def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
26382638 (v8i16 (add (sub (zext (v8i8 V64:$opA)),
29042904 defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
29052905 defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
29062906 defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
2907 TriOpFrag<(add node:$LHS, (sabsdiff node:$MHS, node:$RHS))> >;
2908 defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", sabsdiff>;
2907 TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
2908 defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
29092909 defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
29102910 defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
29112911 defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
29232923 defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
29242924 defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
29252925 defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
2926 TriOpFrag<(add node:$LHS, (uabsdiff node:$MHS, node:$RHS))> >;
2927 defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", uabsdiff>;
2926 TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
2927 defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
29282928 defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
29292929 defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
29302930 defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
34263426 defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
34273427 defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
34283428 defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
3429 sabsdiff>;
3429 int_aarch64_neon_sabd>;
34303430 defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
3431 sabsdiff>;
3431 int_aarch64_neon_sabd>;
34323432 defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
34333433 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
34343434 defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
34493449 defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
34503450 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
34513451 defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
3452 uabsdiff>;
3452 int_aarch64_neon_uabd>;
34533453 defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
34543454 BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
34553455 defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
142142 setOperationAction(ISD::UREM, VT, Expand);
143143 setOperationAction(ISD::FREM, VT, Expand);
144144
145 if (VT.isInteger()) {
146 setOperationAction(ISD::SABSDIFF, VT, Legal);
147 setOperationAction(ISD::UABSDIFF, VT, Legal);
148 }
149145 if (!VT.isFloatingPoint() &&
150146 VT != MVT::v2i64 && VT != MVT::v1i64)
151147 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
152148 setOperationAction(Opcode, VT, Legal);
153
154149 }
155150
156151 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
1014710142 // Don't do anything for most intrinsics.
1014810143 break;
1014910144
10150 case Intrinsic::arm_neon_vabds:
10151 if (!N->getValueType(0).isInteger())
10152 return SDValue();
10153 return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
10154 N->getOperand(1), N->getOperand(2));
10155 case Intrinsic::arm_neon_vabdu:
10156 return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
10157 N->getOperand(1), N->getOperand(2));
10158
1015910145 // Vector shifts: check for immediate versions and lower them.
1016010146 // Note: This is done during DAG combining instead of DAG legalizing because
1016110147 // the build_vectors for 64-bit vector element shift counts are generally
49934993 // VABD : Vector Absolute Difference
49944994 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
49954995 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4996 "vabd", "s", sabsdiff, 1>;
4996 "vabd", "s", int_arm_neon_vabds, 1>;
49974997 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
49984998 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4999 "vabd", "u", uabsdiff, 1>;
4999 "vabd", "u", int_arm_neon_vabdu, 1>;
50005000 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
50015001 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
50025002 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
50045004
50055005 // VABDL : Vector Absolute Difference Long (Q = | D - D |)
50065006 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5007 "vabdl", "s", sabsdiff, zext, 1>;
5007 "vabdl", "s", int_arm_neon_vabds, zext, 1>;
50085008 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5009 "vabdl", "u", uabsdiff, zext, 1>;
5009 "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
50105010
50115011 def abd_shr :
50125012 PatFrag<(ops node:$in1, node:$in2, node:$shift),
50335033
50345034 // VABA : Vector Absolute Difference and Accumulate
50355035 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5036 "vaba", "s", sabsdiff, add>;
5036 "vaba", "s", int_arm_neon_vabds, add>;
50375037 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5038 "vaba", "u", uabsdiff, add>;
5038 "vaba", "u", int_arm_neon_vabdu, add>;
50395039
50405040 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
50415041 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5042 "vabal", "s", sabsdiff, zext, add>;
5042 "vabal", "s", int_arm_neon_vabds, zext, add>;
50435043 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5044 "vabal", "u", uabsdiff, zext, add>;
5044 "vabal", "u", int_arm_neon_vabdu, zext, add>;
50455045
50465046 // Vector Maximum and Minimum.
50475047
+0
-181
test/CodeGen/X86/absdiff_128.ll less more
None ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
1
2 declare <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8>, <4 x i8>)
3
4 define <4 x i8> @test_uabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
5 ; CHECK-LABEL: test_uabsdiff_v4i8_expand
6 ; CHECK: pshufd
7 ; CHECK: movd
8 ; CHECK: subl
9 ; CHECK: punpckldq
10 ; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
11 ; CHECK-DAG: movd %xmm0, [[DST:%.*]]
12 ; CHECK: subl [[SRC]], [[DST]]
13 ; CHECK: movd
14 ; CHECK: pshufd
15 ; CHECK: movd
16 ; CHECK: punpckldq
17 ; CHECK: movdqa
18 ; CHECK: retq
19
20 %1 = call <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
21 ret <4 x i8> %1
22 }
23
24 declare <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8>, <4 x i8>)
25
26 define <4 x i8> @test_sabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
27 ; CHECK-LABEL: test_sabsdiff_v4i8_expand
28 ; CHECK: psubd
29 ; CHECK: pcmpgtd
30 ; CHECK: pcmpeqd
31 ; CHECK: pxor
32 ; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
33 ; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
34 ; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
35 ; CHECK: por [[SRC2]], [[DST]]
36 ; CHECK: retq
37
38 %1 = call <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
39 ret <4 x i8> %1
40 }
41
42 declare <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8>, <8 x i8>)
43
44 define <8 x i8> @test_sabsdiff_v8i8_expand(<8 x i8> %a1, <8 x i8> %a2) {
45 ; CHECK-LABEL: test_sabsdiff_v8i8_expand
46 ; CHECK: psubw
47 ; CHECK: pcmpgtw
48 ; CHECK: pcmpeqd
49 ; CHECK: pxor
50 ; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
51 ; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
52 ; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
53 ; CHECK: por [[SRC2]], [[DST]]
54 ; CHECK: retq
55
56 %1 = call <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8> %a1, <8 x i8> %a2)
57 ret <8 x i8> %1
58 }
59
60 declare <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8>, <16 x i8>)
61
62 define <16 x i8> @test_uabsdiff_v16i8_expand(<16 x i8> %a1, <16 x i8> %a2) {
63 ; CHECK-LABEL: test_uabsdiff_v16i8_expand
64 ; CHECK: movd
65 ; CHECK: movzbl
66 ; CHECK: movzbl
67 ; CHECK: subl
68 ; CHECK: punpcklbw
69 ; CHECK: retq
70
71 %1 = call <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8> %a1, <16 x i8> %a2)
72 ret <16 x i8> %1
73 }
74
75 declare <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16>, <8 x i16>)
76
77 define <8 x i16> @test_uabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
78 ; CHECK-LABEL: test_uabsdiff_v8i16_expand
79 ; CHECK: pextrw
80 ; CHECK: pextrw
81 ; CHECK: subl
82 ; CHECK: punpcklwd
83 ; CHECK: retq
84
85 %1 = call <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
86 ret <8 x i16> %1
87 }
88
89 declare <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16>, <8 x i16>)
90
91 define <8 x i16> @test_sabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
92 ; CHECK-LABEL: test_sabsdiff_v8i16_expand
93 ; CHECK: psubw
94 ; CHECK: pcmpgtw
95 ; CHECK: pcmpeqd
96 ; CHECK: pxor
97 ; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
98 ; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
99 ; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
100 ; CHECK: por [[SRC2]], [[DST]]
101 ; CHECK: retq
102
103 %1 = call <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
104 ret <8 x i16> %1
105 }
106
107 declare <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32>, <4 x i32>)
108
109 define <4 x i32> @test_sabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
110 ; CHECK-LABEL: test_sabsdiff_v4i32_expand
111 ; CHECK: psubd
112 ; CHECK: pcmpgtd
113 ; CHECK: pcmpeqd
114 ; CHECK: pxor
115 ; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
116 ; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
117 ; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
118 ; CHECK: por [[SRC2]], [[DST]]
119 ; CHECK: retq
120 %1 = call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
121 ret <4 x i32> %1
122 }
123
124 declare <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32>, <4 x i32>)
125
126 define <4 x i32> @test_uabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
127 ; CHECK-LABEL: test_uabsdiff_v4i32_expand
128 ; CHECK: pshufd
129 ; CHECK: movd
130 ; CHECK: subl
131 ; CHECK: punpckldq
132 ; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
133 ; CHECK-DAG: movd %xmm0, [[DST:%.*]]
134 ; CHECK: subl [[SRC]], [[DST]]
135 ; CHECK: movd
136 ; CHECK: pshufd
137 ; CHECK: movd
138 ; CHECK: punpckldq
139 ; CHECK: movdqa
140 ; CHECK: retq
141
142 %1 = call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
143 ret <4 x i32> %1
144 }
145
146 declare <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32>, <2 x i32>)
147
148 define <2 x i32> @test_sabsdiff_v2i32_expand(<2 x i32> %a1, <2 x i32> %a2) {
149 ; CHECK-LABEL: test_sabsdiff_v2i32_expand
150 ; CHECK: psubq
151 ; CHECK: pcmpgtd
152 ; CHECK: pcmpeqd
153 ; CHECK: pxor
154 ; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
155 ; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
156 ; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
157 ; CHECK: por [[SRC2]], [[DST]]
158 ; CHECK: retq
159
160 %1 = call <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32> %a1, <2 x i32> %a2)
161 ret <2 x i32> %1
162 }
163
164 declare <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64>, <2 x i64>)
165
166 define <2 x i64> @test_sabsdiff_v2i64_expand(<2 x i64> %a1, <2 x i64> %a2) {
167 ; CHECK-LABEL: test_sabsdiff_v2i64_expand
168 ; CHECK: psubq
169 ; CHECK: pcmpgtd
170 ; CHECK: pcmpeqd
171 ; CHECK: pxor
172 ; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
173 ; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
174 ; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
175 ; CHECK: por [[SRC2]], [[DST]]
176 ; CHECK: retq
177
178 %1 = call <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64> %a1, <2 x i64> %a2)
179 ret <2 x i64> %1
180 }
+0
-29
test/CodeGen/X86/absdiff_256.ll less more
None ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
1
2 declare <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16>, <16 x i16>)
3
4 define <16 x i16> @test_sabsdiff_v16i16_expand(<16 x i16> %a1, <16 x i16> %a2) {
5 ; CHECK-LABEL: test_sabsdiff_v16i16_expand:
6 ; CHECK: # BB#0:
7 ; CHECK: psubw
8 ; CHECK: pxor
9 ; CHECK: pcmpgtw
10 ; CHECK: movdqa
11 ; CHECK: pandn
12 ; CHECK: pxor
13 ; CHECK: psubw
14 ; CHECK: pcmpeqd
15 ; CHECK: pxor
16 ; CHECK: pandn
17 ; CHECK: por
18 ; CHECK: pcmpgtw
19 ; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC:%xmm[0-9]+]]
20 ; CHECK-DAG: pxor {{%xmm[0-9]+}}, [[DST:%xmm[0-9]+]]
21 ; CHECK: pandn [[SRC]], [[DST]]
22 ; CHECK: por
23 ; CHECK: movdqa
24 ; CHECK: retq
25 %1 = call <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16> %a1, <16 x i16> %a2)
26 ret <16 x i16> %1
27 }
28