llvm.org GIT mirror llvm / c9c8b2a
CellSPU: - Rename fcmp.ll test to fcmp32.ll, start adding new double tests to fcmp64.ll - Fix select_bits.ll test - Capitulate to the DAGCombiner and move i64 constant loads to instruction selection (SPUISelDAGtoDAG.cpp). <rant>DAGCombiner will insert all kinds of 64-bit optimizations after operation legalization occurs and now we have to do most of the work that instruction selection should be doing twice (once to determine if v2i64 build_vector can be handled by SelectCode(), which then runs all of the predicates a second time to select the necessary instructions.) But, CellSPU is a good citizen.</rant> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62990 91177308-0d34-0410-b5e6-96231b3b80d8 Scott Michel 11 years ago
12 changed file(s) with 578 addition(s) and 326 deletion(s). Raw diff Collapse all Expand all
2929 // selb instruction definition for i64. Note that the selection mask is
3030 // a vector, produced by various forms of FSM:
3131 def SELBr64_cond:
32 SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
33 [/* no pattern */]>;
32 SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
33 [/* no pattern */]>;
3434
3535 // The generic i64 select pattern, which assumes that the comparison result
3636 // is in a 32-bit register that contains a select mask pattern (i.e., gather
253253 /// getSmallIPtrImm - Return a target constant of pointer type.
254254 inline SDValue getSmallIPtrImm(unsigned Imm) {
255255 return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
256 }
256 }
257257
258258 SDNode *emitBuildVector(SDValue build_vec) {
259 MVT vecVT = build_vec.getValueType();
260 SDNode *bvNode = build_vec.getNode();
261 bool canBeSelected = false;
262
263 // Check to see if this vector can be represented as a CellSPU immediate
264 // constant.
265 if (vecVT == MVT::v8i16) {
266 if (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0) {
267 canBeSelected = true;
268 }
269 } else if (vecVT == MVT::v4i32) {
270 if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
271 || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
272 || (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
273 || (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0)) {
274 canBeSelected = true;
275 }
276 } else if (vecVT == MVT::v2i64) {
277 if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)
278 || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)
279 || (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)) {
280 canBeSelected = true;
281 }
282 }
283
284 if (canBeSelected) {
285 return Select(build_vec);
286 }
287
288 // No, need to emit a constant pool spill:
259289 std::vector CV;
260290
261291 for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
262 ConstantSDNode *V = dyn_cast(build_vec.getOperand(i));
263 CV.push_back(const_cast(V->getConstantIntValue()));
292 ConstantSDNode *V = dyn_cast (build_vec.getOperand(i));
293 CV.push_back(const_cast (V->getConstantIntValue()));
264294 }
265295
266296 Constant *CP = ConstantVector::get(CV);
267297 SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
268 unsigned Alignment = 1 << cast>(CPIdx)->getAlignment();
298 unsigned Alignment = 1 << cast > (CPIdx)->getAlignment();
269299 SDValue CGPoolOffset =
270300 SPU::LowerConstantPool(CPIdx, *CurDAG,
271301 SPUtli.getSPUTargetMachine());
272302 return SelectCode(CurDAG->getLoad(build_vec.getValueType(),
273 CurDAG->getEntryNode(), CGPoolOffset,
274 PseudoSourceValue::getConstantPool(), 0,
275 false, Alignment));
303 CurDAG->getEntryNode(), CGPoolOffset,
304 PseudoSourceValue::getConstantPool(), 0,
305 false, Alignment));
276306 }
277307
278308 /// Select - Convert the specified operand from a target-independent to a
287317
288318 //! Emit the instruction sequence for i64 sra
289319 SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
320
321 //! Emit the necessary sequence for loading i64 constants:
322 SDNode *SelectI64Constant(SDValue &Op, MVT OpVT);
290323
291324 //! Returns true if the address N is an A-form (local store) address
292325 bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
651684
652685 if (N->isMachineOpcode()) {
653686 return NULL; // Already selected.
654 } else if (Opc == ISD::FrameIndex) {
687 }
688
689 if (Opc == ISD::FrameIndex) {
655690 int FI = cast(N)->getIndex();
656691 SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
657692 SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType());
668703 TFI, Imm0), 0);
669704 n_ops = 2;
670705 }
706 } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
707 // Catch the i64 constants that end up here. Note: The backend doesn't
708 // attempt to legalize the constant (it's useless because DAGCombiner
709 // will insert 64-bit constants and we can't stop it).
710 return SelectI64Constant(Op, OpVT);
671711 } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
672712 && OpVT == MVT::i64) {
673713 SDValue Op0 = Op.getOperand(0);
744784 return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
745785 Op.getOperand(0), Op.getOperand(1),
746786 SDValue(CGLoad, 0)));
747 } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
748 SDNode *CGLoad =
749 emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
750
751 return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT,
752 Op.getOperand(0), Op.getOperand(1),
753 SDValue(CGLoad, 0)));
754 } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
755 SDNode *CGLoad =
756 emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG));
757
758 return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT,
759 Op.getOperand(0), Op.getOperand(1),
760 SDValue(CGLoad, 0)));
761 } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
762 SDNode *CGLoad =
763 emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
764
765 return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
766 Op.getOperand(0), Op.getOperand(1),
767 SDValue(CGLoad, 0)));
787 } else if (Opc == ISD::TRUNCATE) {
788 SDValue Op0 = Op.getOperand(0);
789 if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
790 && OpVT == MVT::i32
791 && Op0.getValueType() == MVT::i64) {
792 // Catch the (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 to
793 // take advantage of the fact that the upper 32 bits are in the
794 // i32 preferred slot and avoid all kinds of other shuffle gymnastics:
795 ConstantSDNode *CN = dyn_cast(Op0.getOperand(1));
796 if (CN != 0) {
797 unsigned shift_amt = unsigned(CN->getZExtValue());
798
799 if (shift_amt >= 32) {
800 SDNode *hi32 =
801 CurDAG->getTargetNode(SPU::ORr32_r64, OpVT, Op0.getOperand(0));
802
803 shift_amt -= 32;
804 if (shift_amt > 0) {
805 // Take care of the additional shift, if present:
806 SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
807 unsigned Opc = SPU::ROTMAIr32_i32;
808
809 if (Op0.getOpcode() == ISD::SRL)
810 Opc = SPU::ROTMr32;
811
812 hi32 = CurDAG->getTargetNode(Opc, OpVT, SDValue(hi32, 0), shift);
813 }
814
815 return hi32;
816 }
817 }
818 }
768819 } else if (Opc == ISD::SHL) {
769820 if (OpVT == MVT::i64) {
770821 return SelectSHLi64(Op, OpVT);
10451096 return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
10461097 }
10471098
1099 /*!
1100 Do the necessary magic necessary to load a i64 constant
1101 */
1102 SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) {
1103 ConstantSDNode *CN = cast(Op.getNode());
1104 MVT OpVecVT = MVT::getVectorVT(OpVT, 2);
1105 SDValue i64vec =
1106 SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue());
1107
1108 // Here's where it gets interesting, because we have to parse out the
1109 // subtree handed back in i64vec:
1110
1111 if (i64vec.getOpcode() == ISD::BIT_CONVERT) {
1112 // The degenerate case where the upper and lower bits in the splat are
1113 // identical:
1114 SDValue Op0 = i64vec.getOperand(0);
1115 ReplaceUses(i64vec, Op0);
1116
1117 return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT,
1118 SDValue(emitBuildVector(Op0), 0));
1119 } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
1120 SDValue lhs = i64vec.getOperand(0);
1121 SDValue rhs = i64vec.getOperand(1);
1122 SDValue shufmask = i64vec.getOperand(2);
1123
1124 if (lhs.getOpcode() == ISD::BIT_CONVERT) {
1125 ReplaceUses(lhs, lhs.getOperand(0));
1126 lhs = lhs.getOperand(0);
1127 }
1128
1129 SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
1130 ? lhs.getNode()
1131 : emitBuildVector(lhs));
1132
1133 if (rhs.getOpcode() == ISD::BIT_CONVERT) {
1134 ReplaceUses(rhs, rhs.getOperand(0));
1135 rhs = rhs.getOperand(0);
1136 }
1137
1138 SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
1139 ? rhs.getNode()
1140 : emitBuildVector(rhs));
1141
1142 if (shufmask.getOpcode() == ISD::BIT_CONVERT) {
1143 ReplaceUses(shufmask, shufmask.getOperand(0));
1144 shufmask = shufmask.getOperand(0);
1145 }
1146
1147 SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
1148 ? shufmask.getNode()
1149 : emitBuildVector(shufmask));
1150
1151 SDNode *shufNode =
1152 Select(CurDAG->getNode(SPUISD::SHUFB, OpVecVT,
1153 SDValue(lhsNode, 0), SDValue(rhsNode, 0),
1154 SDValue(shufMaskNode, 0)));
1155
1156 return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(shufNode, 0));
1157 } else {
1158 cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n";
1159 abort();
1160 }
1161 }
1162
10481163 /// createSPUISelDag - This pass converts a legalized DAG into a
10491164 /// SPU-specific DAG, ready for instruction scheduling.
10501165 ///
1616 #include "SPUFrameInfo.h"
1717 #include "llvm/ADT/APInt.h"
1818 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CallingConv.h"
1920 #include "llvm/CodeGen/CallingConvLower.h"
2021 #include "llvm/CodeGen/MachineFrameInfo.h"
2122 #include "llvm/CodeGen/MachineFunction.h"
7879 return retval;
7980 }
8081
82 //! Expand a library call into an actual call DAG node
83 /*!
84 \note
85 This code is taken from SelectionDAGLegalize, since it is not exposed as
86 part of the LLVM SelectionDAG API.
87 */
88
89 SDValue
90 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
91 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
92 // The input chain to this libcall is the entry node of the function.
93 // Legalizing the call will automatically add the previous call to the
94 // dependence.
95 SDValue InChain = DAG.getEntryNode();
96
97 TargetLowering::ArgListTy Args;
98 TargetLowering::ArgListEntry Entry;
99 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
100 MVT ArgVT = Op.getOperand(i).getValueType();
101 const Type *ArgTy = ArgVT.getTypeForMVT();
102 Entry.Node = Op.getOperand(i);
103 Entry.Ty = ArgTy;
104 Entry.isSExt = isSigned;
105 Entry.isZExt = !isSigned;
106 Args.push_back(Entry);
107 }
108 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
109 TLI.getPointerTy());
110
111 // Splice the libcall in wherever FindInputOutputChains tells us to.
112 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
113 std::pair CallInfo =
114 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
115 CallingConv::C, false, Callee, Args, DAG);
116
117 return CallInfo.first;
118 }
81119 }
82120
83121 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
112150 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
113151
114152 // SPU constant load actions are custom lowered:
115 setOperationAction(ISD::Constant, MVT::i64, Custom);
116153 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
117154 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
118155
126163 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
127164 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
128165 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
129
130 // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
131 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
132 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
133166
134167 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
135168 MVT StoreVT = (MVT::SimpleValueType) stype;
178211 setOperationAction(ISD::FCOS , MVT::f32, Expand);
179212 setOperationAction(ISD::FREM , MVT::f32, Expand);
180213
181 // If we're enabling GP optimizations, use hardware square root
214 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
215 // for f32!)
182216 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
183217 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
184218
185219 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
186220 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
187
188 // Make sure that DAGCombine doesn't insert illegal 64-bit constants
189 setOperationAction(ISD::FABS, MVT::f64, Custom);
190221
191222 // SPU can do rotate right and left, so legalize it... but customize for i8
192223 // because instructions don't exist.
253284 // Custom lower i128 -> i64 truncates
254285 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
255286
256 // SPU has a legal FP -> signed INT instruction
257 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
258 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
259 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
260 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
287 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
288 // to expand to a libcall, hence the custom lowering:
289 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
290 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
261291
262292 // FDIV on SPU requires custom lowering
263 setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
293 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
264294
265295 // SPU has [U|S]INT_TO_FP
266 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
296 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
267297 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
268 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
269 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
298 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
299 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
270300 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
271 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
301 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
272302 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
273303 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
274304
337367 MVT VT = (MVT::SimpleValueType)i;
338368
339369 // add/sub are legal for all supported vector VT's.
340 setOperationAction(ISD::ADD , VT, Legal);
341 setOperationAction(ISD::SUB , VT, Legal);
370 setOperationAction(ISD::ADD, VT, Legal);
371 setOperationAction(ISD::SUB, VT, Legal);
342372 // mul has to be custom lowered.
343 // TODO: v2i64 vector multiply
344 setOperationAction(ISD::MUL , VT, Legal);
345
346 setOperationAction(ISD::AND , VT, Legal);
347 setOperationAction(ISD::OR , VT, Legal);
348 setOperationAction(ISD::XOR , VT, Legal);
349 setOperationAction(ISD::LOAD , VT, Legal);
350 setOperationAction(ISD::SELECT, VT, Legal);
351 setOperationAction(ISD::STORE, VT, Legal);
373 setOperationAction(ISD::MUL, VT, Legal);
374
375 setOperationAction(ISD::AND, VT, Legal);
376 setOperationAction(ISD::OR, VT, Legal);
377 setOperationAction(ISD::XOR, VT, Legal);
378 setOperationAction(ISD::LOAD, VT, Legal);
379 setOperationAction(ISD::SELECT, VT, Legal);
380 setOperationAction(ISD::STORE, VT, Legal);
352381
353382 // These operations need to be expanded:
354 setOperationAction(ISD::SDIV, VT, Expand);
355 setOperationAction(ISD::SREM, VT, Expand);
356 setOperationAction(ISD::UDIV, VT, Expand);
357 setOperationAction(ISD::UREM, VT, Expand);
383 setOperationAction(ISD::SDIV, VT, Expand);
384 setOperationAction(ISD::SREM, VT, Expand);
385 setOperationAction(ISD::UDIV, VT, Expand);
386 setOperationAction(ISD::UREM, VT, Expand);
358387
359388 // Custom lower build_vector, constant pool spills, insert and
360389 // extract vector elements:
858887 } else {
859888 cerr << "LowerGlobalAddress: Relocation model other than static not "
860889 << "supported.\n";
861 abort();
862 /*NOTREACHED*/
863 }
864
865 return SDValue();
866 }
867
868 //! Custom lower i64 integer constants
869 /*!
870 This code inserts all of the necessary juggling that needs to occur to load
871 a 64-bit constant into a register.
872 */
873 static SDValue
874 LowerConstant(SDValue Op, SelectionDAG &DAG) {
875 MVT VT = Op.getValueType();
876
877 if (VT == MVT::i64) {
878 ConstantSDNode *CN = cast(Op.getNode());
879 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
880 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
881 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
882 } else {
883 cerr << "LowerConstant: unhandled constant type "
884 << VT.getMVTString()
885 << "\n";
886890 abort();
887891 /*NOTREACHED*/
888892 }
15631567
15641568 //! Lower a BUILD_VECTOR instruction creatively:
15651569 SDValue
1566 SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1570 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
15671571 MVT VT = Op.getValueType();
15681572 // If this is a vector of constants or undefs, get the bits. A bit in
15691573 // UndefBits is set if the corresponding element of the vector is an
15871591 abort();
15881592 /*NOTREACHED*/
15891593 case MVT::v4f32: {
1590 uint32_t Value32 = SplatBits;
1594 uint32_t Value32 = uint32_t(SplatBits);
15911595 assert(SplatSize == 4
15921596 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
15931597 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
15971601 break;
15981602 }
15991603 case MVT::v2f64: {
1600 uint64_t f64val = SplatBits;
1604 uint64_t f64val = uint64_t(SplatBits);
16011605 assert(SplatSize == 8
16021606 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
16031607 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
16371641 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
16381642 }
16391643 case MVT::v2i64: {
1640 uint64_t val = SplatBits;
1641 uint32_t upper = uint32_t(val >> 32);
1642 uint32_t lower = uint32_t(val);
1643
1644 if (upper == lower) {
1645 // Magic constant that can be matched by IL, ILA, et. al.
1646 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1647 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1648 } else {
1649 SDValue LO32;
1650 SDValue HI32;
1651 SmallVector ShufBytes;
1652 SDValue Result;
1653 bool upper_special, lower_special;
1654
1655 // NOTE: This code creates common-case shuffle masks that can be easily
1656 // detected as common expressions. It is not attempting to create highly
1657 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1658
1659 // Detect if the upper or lower half is a special shuffle mask pattern:
1660 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1661 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1662
1663 // Create lower vector if not a special pattern
1664 if (!lower_special) {
1665 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1666 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1667 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1668 LO32C, LO32C, LO32C, LO32C));
1644 return SPU::LowerSplat_v2i64(VT, DAG, SplatBits);
1645 }
1646 }
1647
1648 return SDValue();
1649 }
1650
1651 SDValue
1652 SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
1653 uint32_t upper = uint32_t(SplatVal >> 32);
1654 uint32_t lower = uint32_t(SplatVal);
1655
1656 if (upper == lower) {
1657 // Magic constant that can be matched by IL, ILA, et. al.
1658 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1659 return DAG.getNode(ISD::BIT_CONVERT, OpVT,
1660 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1661 Val, Val, Val, Val));
1662 } else {
1663 SDValue LO32;
1664 SDValue HI32;
1665 SmallVector ShufBytes;
1666 SDValue Result;
1667 bool upper_special, lower_special;
1668
1669 // NOTE: This code creates common-case shuffle masks that can be easily
1670 // detected as common expressions. It is not attempting to create highly
1671 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1672
1673 // Detect if the upper or lower half is a special shuffle mask pattern:
1674 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1675 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1676
1677 // Create lower vector if not a special pattern
1678 if (!lower_special) {
1679 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1680 LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1681 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1682 LO32C, LO32C, LO32C, LO32C));
1683 }
1684
1685 // Create upper vector if not a special pattern
1686 if (!upper_special) {
1687 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1688 HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1689 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1690 HI32C, HI32C, HI32C, HI32C));
1691 }
1692
1693 // If either upper or lower are special, then the two input operands are
1694 // the same (basically, one of them is a "don't care")
1695 if (lower_special)
1696 LO32 = HI32;
1697 if (upper_special)
1698 HI32 = LO32;
1699 if (lower_special && upper_special) {
1700 // Unhappy situation... both upper and lower are special, so punt with
1701 // a target constant:
1702 SDValue Zero = DAG.getConstant(0, MVT::i32);
1703 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1704 Zero, Zero);
1705 }
1706
1707 for (int i = 0; i < 4; ++i) {
1708 uint64_t val = 0;
1709 for (int j = 0; j < 4; ++j) {
1710 SDValue V;
1711 bool process_upper, process_lower;
1712 val <<= 8;
1713 process_upper = (upper_special && (i & 1) == 0);
1714 process_lower = (lower_special && (i & 1) == 1);
1715
1716 if (process_upper || process_lower) {
1717 if ((process_upper && upper == 0)
1718 || (process_lower && lower == 0))
1719 val |= 0x80;
1720 else if ((process_upper && upper == 0xffffffff)
1721 || (process_lower && lower == 0xffffffff))
1722 val |= 0xc0;
1723 else if ((process_upper && upper == 0x80000000)
1724 || (process_lower && lower == 0x80000000))
1725 val |= (j == 0 ? 0xe0 : 0x80);
1726 } else
1727 val |= i * 4 + j + ((i & 1) * 16);
16691728 }
16701729
1671 // Create upper vector if not a special pattern
1672 if (!upper_special) {
1673 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1674 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1675 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1676 HI32C, HI32C, HI32C, HI32C));
1677 }
1678
1679 // If either upper or lower are special, then the two input operands are
1680 // the same (basically, one of them is a "don't care")
1681 if (lower_special)
1682 LO32 = HI32;
1683 if (upper_special)
1684 HI32 = LO32;
1685 if (lower_special && upper_special) {
1686 // Unhappy situation... both upper and lower are special, so punt with
1687 // a target constant:
1688 SDValue Zero = DAG.getConstant(0, MVT::i32);
1689 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1690 Zero, Zero);
1691 }
1692
1693 for (int i = 0; i < 4; ++i) {
1694 uint64_t val = 0;
1695 for (int j = 0; j < 4; ++j) {
1696 SDValue V;
1697 bool process_upper, process_lower;
1698 val <<= 8;
1699 process_upper = (upper_special && (i & 1) == 0);
1700 process_lower = (lower_special && (i & 1) == 1);
1701
1702 if (process_upper || process_lower) {
1703 if ((process_upper && upper == 0)
1704 || (process_lower && lower == 0))
1705 val |= 0x80;
1706 else if ((process_upper && upper == 0xffffffff)
1707 || (process_lower && lower == 0xffffffff))
1708 val |= 0xc0;
1709 else if ((process_upper && upper == 0x80000000)
1710 || (process_lower && lower == 0x80000000))
1711 val |= (j == 0 ? 0xe0 : 0x80);
1712 } else
1713 val |= i * 4 + j + ((i & 1) * 16);
1714 }
1715
1716 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1717 }
1718
1719 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1720 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1721 &ShufBytes[0], ShufBytes.size()));
1722 }
1723 }
1724 }
1725
1726 return SDValue();
1730 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1731 }
1732
1733 return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32,
1734 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1735 &ShufBytes[0], ShufBytes.size()));
1736 }
17271737 }
17281738
17291739 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
23832393 return SDValue();
23842394 }
23852395
2386 //! Lower ISD::FABS
2396 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
23872397 /*!
2388 DAGCombine does the same basic reduction: convert the double to i64 and mask
2389 off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which
2390 CellSPU has to legalize. Hence, the custom lowering.
2398 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2399 All conversions to i64 are expanded to a libcall.
23912400 */
2392
2393 static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
2401 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2402 SPUTargetLowering &TLI) {
23942403 MVT OpVT = Op.getValueType();
2395 MVT IntVT(MVT::i64);
23962404 SDValue Op0 = Op.getOperand(0);
2397
2398 assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n");
2399
2400 SDValue iABS =
2401 DAG.getNode(ISD::AND, IntVT,
2402 DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0),
2403 DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT));
2404
2405 return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS);
2405 MVT Op0VT = Op0.getValueType();
2406
2407 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2408 || OpVT == MVT::i64) {
2409 // Convert f32 / f64 to i32 / i64 via libcall.
2410 RTLIB::Libcall LC =
2411 (Op.getOpcode() == ISD::FP_TO_SINT)
2412 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2413 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2414 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2415 SDValue Dummy;
2416 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2417 }
2418
2419 return SDValue();
2420 }
2421
2422 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2423 /*!
2424 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2425 All conversions from i64 are expanded to a libcall.
2426 */
2427 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2428 SPUTargetLowering &TLI) {
2429 MVT OpVT = Op.getValueType();
2430 SDValue Op0 = Op.getOperand(0);
2431 MVT Op0VT = Op0.getValueType();
2432
2433 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2434 || Op0VT == MVT::i64) {
2435 // Convert i32, i64 to f64 via libcall:
2436 RTLIB::Libcall LC =
2437 (Op.getOpcode() == ISD::SINT_TO_FP)
2438 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2439 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2440 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2441 SDValue Dummy;
2442 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2443 }
2444
2445 return SDValue();
24062446 }
24072447
24082448 //! Lower ISD::SETCC
24092449 /*!
24102450 This handles MVT::f64 (double floating point) condition lowering
24112451 */
2412
24132452 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
24142453 const TargetLowering &TLI) {
2454 CondCodeSDNode *CC = dyn_cast(Op.getOperand(2));
2455 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2456
24152457 SDValue lhs = Op.getOperand(0);
24162458 SDValue rhs = Op.getOperand(1);
2417 CondCodeSDNode *CC = dyn_cast (Op.getOperand(2));
24182459 MVT lhsVT = lhs.getValueType();
2419 SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64);
2420
2421 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
24222460 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2461
2462 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2463 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2464 MVT IntVT(MVT::i64);
2465
2466 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2467 // selected to a NOP:
2468 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, lhs);
2469 SDValue lhsHi32 =
2470 DAG.getNode(ISD::TRUNCATE, MVT::i32,
2471 DAG.getNode(ISD::SRL, IntVT,
2472 i64lhs, DAG.getConstant(32, MVT::i32)));
2473 SDValue lhsHi32abs =
2474 DAG.getNode(ISD::AND, MVT::i32,
2475 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2476 SDValue lhsLo32 =
2477 DAG.getNode(ISD::TRUNCATE, MVT::i32, i64lhs);
2478
2479 // SETO and SETUO only use the lhs operand:
2480 if (CC->get() == ISD::SETO) {
2481 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2482 // SETUO
2483 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2484 return DAG.getNode(ISD::XOR, ccResultVT,
2485 DAG.getSetCC(ccResultVT,
2486 lhs, DAG.getConstantFP(0.0, lhsVT),
2487 ISD::SETUO),
2488 DAG.getConstant(ccResultAllOnes, ccResultVT));
2489 } else if (CC->get() == ISD::SETUO) {
2490 // Evaluates to true if Op0 is [SQ]NaN
2491 return DAG.getNode(ISD::AND, ccResultVT,
2492 DAG.getSetCC(ccResultVT,
2493 lhsHi32abs,
2494 DAG.getConstant(0x7ff00000, MVT::i32),
2495 ISD::SETGE),
2496 DAG.getSetCC(ccResultVT,
2497 lhsLo32,
2498 DAG.getConstant(0, MVT::i32),
2499 ISD::SETGT));
2500 }
2501
2502 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs);
2503 SDValue rhsHi32 =
2504 DAG.getNode(ISD::TRUNCATE, MVT::i32,
2505 DAG.getNode(ISD::SRL, IntVT,
2506 i64rhs, DAG.getConstant(32, MVT::i32)));
2507
2508 // If a value is negative, subtract from the sign magnitude constant:
2509 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2510
2511 // Convert the sign-magnitude representation into 2's complement:
2512 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
2513 lhsHi32, DAG.getConstant(31, MVT::i32));
2514 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64lhs);
2515 SDValue lhsSelect =
2516 DAG.getNode(ISD::SELECT, IntVT,
2517 lhsSelectMask, lhsSignMag2TC, i64lhs);
2518
2519 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
2520 rhsHi32, DAG.getConstant(31, MVT::i32));
2521 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64rhs);
2522 SDValue rhsSelect =
2523 DAG.getNode(ISD::SELECT, IntVT,
2524 rhsSelectMask, rhsSignMag2TC, i64rhs);
2525
2526 unsigned compareOp;
24232527
24242528 switch (CC->get()) {
24252529 case ISD::SETOEQ:
2530 case ISD::SETUEQ:
2531 compareOp = ISD::SETEQ; break;
24262532 case ISD::SETOGT:
2533 case ISD::SETUGT:
2534 compareOp = ISD::SETGT; break;
24272535 case ISD::SETOGE:
2536 case ISD::SETUGE:
2537 compareOp = ISD::SETGE; break;
24282538 case ISD::SETOLT:
2539 case ISD::SETULT:
2540 compareOp = ISD::SETLT; break;
24292541 case ISD::SETOLE:
2542 case ISD::SETULE:
2543 compareOp = ISD::SETLE; break;
2544 case ISD::SETUNE:
24302545 case ISD::SETONE:
2431 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2432 abort();
2433 break;
2434 case ISD::SETO: {
2435 SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
2436 SDValue i64lhs =
2437 DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
2438
2439 return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT);
2440 }
2441 case ISD::SETUO: {
2442 SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
2443 SDValue i64lhs =
2444 DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
2445
2446 return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE);
2447 }
2448 case ISD::SETUEQ:
2449 case ISD::SETUGT:
2450 case ISD::SETUGE:
2451 case ISD::SETULT:
2452 case ISD::SETULE:
2453 case ISD::SETUNE:
2546 compareOp = ISD::SETNE; break;
24542547 default:
24552548 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
24562549 abort();
24572550 break;
24582551 }
24592552
2460 return SDValue();
2553 SDValue result =
2554 DAG.getSetCC(ccResultVT, lhsSelect, rhsSelect, (ISD::CondCode) compareOp);
2555
2556 if ((CC->get() & 0x8) == 0) {
2557 // Ordered comparison:
2558 SDValue lhsNaN = DAG.getSetCC(ccResultVT,
2559 lhs, DAG.getConstantFP(0.0, MVT::f64),
2560 ISD::SETO);
2561 SDValue rhsNaN = DAG.getSetCC(ccResultVT,
2562 rhs, DAG.getConstantFP(0.0, MVT::f64),
2563 ISD::SETO);
2564 SDValue ordered = DAG.getNode(ISD::AND, ccResultVT, lhsNaN, rhsNaN);
2565
2566 result = DAG.getNode(ISD::AND, ccResultVT, ordered, result);
2567 }
2568
2569 return result;
24612570 }
24622571
24632572 //! Lower ISD::SELECT_CC
25652674 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
25662675 case ISD::JumpTable:
25672676 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2568 case ISD::Constant:
2569 return LowerConstant(Op, DAG);
25702677 case ISD::ConstantFP:
25712678 return LowerConstantFP(Op, DAG);
25722679 case ISD::FORMAL_ARGUMENTS:
25892696 break;
25902697 }
25912698
2592 case ISD::FABS:
2593 return LowerFABS(Op, DAG);
2699 case ISD::FP_TO_SINT:
2700 case ISD::FP_TO_UINT:
2701 return LowerFP_TO_INT(Op, DAG, *this);
2702
2703 case ISD::SINT_TO_FP:
2704 case ISD::UINT_TO_FP:
2705 return LowerINT_TO_FP(Op, DAG, *this);
25942706
25952707 // Vector-related lowering.
25962708 case ISD::BUILD_VECTOR:
2597 return SPU::LowerBUILD_VECTOR(Op, DAG);
2709 return LowerBUILD_VECTOR(Op, DAG);
25982710 case ISD::SCALAR_TO_VECTOR:
25992711 return LowerSCALAR_TO_VECTOR(Op, DAG);
26002712 case ISD::VECTOR_SHUFFLE:
6060 };
6161 }
6262
63 //! Utility functions specific to CellSPU-only:
63 //! Utility functions specific to CellSPU:
6464 namespace SPU {
6565 SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
6666 MVT ValueType);
7777
7878 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
7979 const SPUTargetMachine &TM);
80 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
80 SDValue LowerSplat_v2i64(MVT OpVT, SelectionDAG &DAG, uint64_t splat);
8181
8282 SDValue getBorrowGenerateShufMask(SelectionDAG &DAG);
8383 SDValue getCarryGenerateShufMask(SelectionDAG &DAG);
154154 case SPU::ORr8_r32:
155155 case SPU::ORr32_r16:
156156 case SPU::ORr32_r8:
157 case SPU::ORr32_r64:
158157 case SPU::ORr16_r64:
159158 case SPU::ORr8_r64:
160 case SPU::ORr64_r32:
161159 case SPU::ORr64_r16:
162160 case SPU::ORr64_r8:
163161 */
162 case SPU::ORr64_r32:
163 case SPU::ORr32_r64:
164164 case SPU::ORf32_r32:
165165 case SPU::ORr32_f32:
166166 case SPU::ORf64_r64:
12581258 def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
12591259 [/* Intentionally does not match a pattern */]>;
12601260
1261 def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
1262 [/* Intentionally does not match a pattern */]>;
1263
12611264 // Could use v4i32, but won't for clarity
12621265 def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
12631266 [/* Intentionally does not match a pattern */]>;
15241527 // Conversion from R32C to register
15251528 def r32_r16: ORCvtFormR32Reg;
15261529 def r32_r8: ORCvtFormR32Reg;
1530 */
15271531
1528 // Conversion from register to R64C:
1532 // Conversion to register from R64C:
15291533 def r32_r64: ORCvtFormR64Reg;
1530 def r16_r64: ORCvtFormR64Reg;
1531 def r8_r64: ORCvtFormR64RegC>;
1534 // def r16_r64: ORCvtFormR64RegC>;
1535 // def r8_r64: ORCvtFormR64Reg;
15321536
1533 // Conversion from R64C to register
1537 // Conversion to R64C from register
15341538 def r64_r32: ORCvtFormRegR64;
1535 def r64_r16: ORCvtFormRegR64;
1536 def r64_r8: ORCvtFormRegR64;
1537 */
1539 // def r64_r16: ORCvtFormRegR64;
1540 // def r64_r8: ORCvtFormRegR64;
15381541
15391542 // bitconvert patterns:
15401543 def r32_f32: ORCvtFormR32Reg
19091912 RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
19101913 IntegerOp, pattern>;
19111914
1912 class SELBVecInst>:
1915 class SELBVecInst, PatFrag vnot_frag = vnot>:
19131916 SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
19141917 [(set (vectype VECREG:$rT),
19151918 (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
1916 (and (vnot (vectype VECREG:$rC)),
1919 (and (vnot_frag (vectype VECREG:$rC)),
19171920 (vectype VECREG:$rA))))]>;
19181921
19191922 class SELBVecVCondInst:
19461949 def v16i8: SELBVecInst;
19471950 def v8i16: SELBVecInst;
19481951 def v4i32: SELBVecInst;
1949 def v2i64: SELBVecInst>;
1952 def v2i64: SELBVecInst, vnot_conv>;
19501953
19511954 def r128: SELBRegInst;
19521955 def r64: SELBRegInst;
43204323 (ANDfabsvec (v4f32 VECREG:$rA),
43214324 (v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
43224325
4326 def : Pat<(fabs R64FP:$rA),
4327 (ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>;
4328
4329 def : Pat<(fabs (v2f64 VECREG:$rA)),
4330 (ANDfabsvec (v2f64 VECREG:$rA),
4331 (v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
4332
43234333 //===----------------------------------------------------------------------===//
43244334 // Hint for branch instructions:
43254335 //===----------------------------------------------------------------------===//
+0
-22
test/CodeGen/CellSPU/fcmp.ll less more
None ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep fceq %t1.s | count 1
2 ; RUN: grep fcmeq %t1.s | count 1
3 ;
4 ; This file includes standard floating point arithmetic instructions
5 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
6 target triple = "spu"
7
8 declare double @fabs(double)
9 declare float @fabsf(float)
10
11 define i1 @fcmp_eq(float %arg1, float %arg2) {
12 %A = fcmp oeq float %arg1, %arg2 ; [#uses=1]
13 ret i1 %A
14 }
15
16 define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
17 %A = call float @fabsf(float %arg1) ; [#uses=1]
18 %B = call float @fabsf(float %arg2) ; [#uses=1]
19 %C = fcmp oeq float %A, %B ; [#uses=1]
20 ret i1 %C
21 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep fceq %t1.s | count 1
2 ; RUN: grep fcmeq %t1.s | count 1
3
4 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
5 target triple = "spu"
6
7 ; Exercise the floating point comparison operators for f32:
8
9 declare double @fabs(double)
10 declare float @fabsf(float)
11
12 define i1 @fcmp_eq(float %arg1, float %arg2) {
13 %A = fcmp oeq float %arg1, %arg2
14 ret i1 %A
15 }
16
17 define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
18 %1 = call float @fabsf(float %arg1)
19 %2 = call float @fabsf(float %arg2)
20 %3 = fcmp oeq float %1, %2
21 ret i1 %3
22 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1
2 define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
3 entry:
4 %A = fcmp oeq double %arg1, %arg2
5 ret i1 %A
6 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep fsmbi %t1.s | count 2
1 ; RUN: grep fsmbi %t1.s | count 3
22 ; RUN: grep 32768 %t1.s | count 2
33 ; RUN: grep xor %t1.s | count 4
4 ; RUN: grep and %t1.s | count 4
5 ; RUN: grep andbi %t1.s | count 2
4 ; RUN: grep and %t1.s | count 5
5 ; RUN: grep andbi %t1.s | count 3
6
67 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
78 target triple = "spu"
89
3233 declare float @fabsf(float)
3334
3435 define double @fabs_dp(double %X) {
35 %Y = call double @fabs( double %X ) ; [#uses=1]
36 %Y = call double @fabs( double %X )
3637 ret double %Y
3738 }
3839
3940 define float @fabs_sp(float %X) {
40 %Y = call float @fabsf( float %X ) ; [#uses=1]
41 %Y = call float @fabsf( float %X )
4142 ret float %Y
4243 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep selb %t1.s | count 280
1 ; RUN: grep selb %t1.s | count 56
22
33 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
44 target triple = "spu"
88 ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
99
1010 ; (or (and rC, rB), (and (not rC), rA))
11 define <2 x i64> @selb_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
11 define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
1212 %C = and <2 x i64> %rC, %rB
1313 %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
1414 %B = and <2 x i64> %A, %rA
1717 }
1818
1919 ; (or (and rB, rC), (and (not rC), rA))
20 define <2 x i64> @selb_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
20 define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
2121 %C = and <2 x i64> %rB, %rC
2222 %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
2323 %B = and <2 x i64> %A, %rA
2626 }
2727
2828 ; (or (and (not rC), rA), (and rB, rC))
29 define <2 x i64> @selb_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
29 define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
3030 %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
3131 %B = and <2 x i64> %A, %rA
3232 %C = and <2 x i64> %rB, %rC
3535 }
3636
3737 ; (or (and (not rC), rA), (and rC, rB))
38 define <2 x i64> @selb_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
38 define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
3939 %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
4040 %B = and <2 x i64> %A, %rA
4141 %C = and <2 x i64> %rC, %rB
4444 }
4545
4646 ; (or (and rC, rB), (and rA, (not rC)))
47 define <2 x i64> @selb_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
47 define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
4848 %C = and <2 x i64> %rC, %rB
4949 %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
5050 %B = and <2 x i64> %rA, %A
5353 }
5454
5555 ; (or (and rB, rC), (and rA, (not rC)))
56 define <2 x i64> @selb_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
56 define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
5757 %C = and <2 x i64> %rB, %rC
5858 %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
5959 %B = and <2 x i64> %rA, %A
6262 }
6363
6464 ; (or (and rA, (not rC)), (and rB, rC))
65 define <2 x i64> @selb_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
65 define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
6666 %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
6767 %B = and <2 x i64> %rA, %A
6868 %C = and <2 x i64> %rB, %rC
7171 }
7272
7373 ; (or (and rA, (not rC)), (and rC, rB))
74 define <2 x i64> @selb_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
74 define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
7575 %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
7676 %B = and <2 x i64> %rA, %A
7777 %C = and <2 x i64> %rC, %rB
8484 ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
8585
8686 ; (or (and rC, rB), (and (not rC), rA))
87 define <4 x i32> @selb_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
87 define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
8888 %C = and <4 x i32> %rC, %rB
8989 %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
9090 %B = and <4 x i32> %A, %rA
9393 }
9494
9595 ; (or (and rB, rC), (and (not rC), rA))
96 define <4 x i32> @selb_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
96 define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
9797 %C = and <4 x i32> %rB, %rC
9898 %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
9999 %B = and <4 x i32> %A, %rA
102102 }
103103
104104 ; (or (and (not rC), rA), (and rB, rC))
105 define <4 x i32> @selb_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
105 define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
106106 %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
107107 %B = and <4 x i32> %A, %rA
108108 %C = and <4 x i32> %rB, %rC
111111 }
112112
113113 ; (or (and (not rC), rA), (and rC, rB))
114 define <4 x i32> @selb_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
114 define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
115115 %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
116116 %B = and <4 x i32> %A, %rA
117117 %C = and <4 x i32> %rC, %rB
120120 }
121121
122122 ; (or (and rC, rB), (and rA, (not rC)))
123 define <4 x i32> @selb_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
123 define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
124124 %C = and <4 x i32> %rC, %rB
125125 %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
126126 %B = and <4 x i32> %rA, %A
129129 }
130130
131131 ; (or (and rB, rC), (and rA, (not rC)))
132 define <4 x i32> @selb_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
132 define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
133133 %C = and <4 x i32> %rB, %rC
134134 %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
135135 %B = and <4 x i32> %rA, %A
138138 }
139139
140140 ; (or (and rA, (not rC)), (and rB, rC))
141 define <4 x i32> @selb_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
141 define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
142142 %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
143143 %B = and <4 x i32> %rA, %A
144144 %C = and <4 x i32> %rB, %rC
147147 }
148148
149149 ; (or (and rA, (not rC)), (and rC, rB))
150 define <4 x i32> @selb_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
150 define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
151151 %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
152152 %B = and <4 x i32> %rA, %A
153153 %C = and <4 x i32> %rC, %rB
160160 ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
161161
162162 ; (or (and rC, rB), (and (not rC), rA))
163 define <8 x i16> @selb_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
163 define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
164164 %C = and <8 x i16> %rC, %rB
165165 %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
166166 i16 -1, i16 -1, i16 -1, i16 -1 >
170170 }
171171
172172 ; (or (and rB, rC), (and (not rC), rA))
173 define <8 x i16> @selb_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
173 define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
174174 %C = and <8 x i16> %rB, %rC
175175 %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
176176 i16 -1, i16 -1, i16 -1, i16 -1 >
180180 }
181181
182182 ; (or (and (not rC), rA), (and rB, rC))
183 define <8 x i16> @selb_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
183 define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
184184 %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
185185 i16 -1, i16 -1, i16 -1, i16 -1 >
186186 %B = and <8 x i16> %A, %rA
190190 }
191191
192192 ; (or (and (not rC), rA), (and rC, rB))
193 define <8 x i16> @selb_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
193 define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
194194 %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
195195 i16 -1, i16 -1, i16 -1, i16 -1 >
196196 %B = and <8 x i16> %A, %rA
200200 }
201201
202202 ; (or (and rC, rB), (and rA, (not rC)))
203 define <8 x i16> @selb_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
203 define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
204204 %C = and <8 x i16> %rC, %rB
205205 %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
206206 i16 -1, i16 -1, i16 -1, i16 -1 >
210210 }
211211
212212 ; (or (and rB, rC), (and rA, (not rC)))
213 define <8 x i16> @selb_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
213 define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
214214 %C = and <8 x i16> %rB, %rC
215215 %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
216216 i16 -1, i16 -1, i16 -1, i16 -1 >
220220 }
221221
222222 ; (or (and rA, (not rC)), (and rB, rC))
223 define <8 x i16> @selb_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
223 define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
224224 %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
225225 i16 -1, i16 -1, i16 -1, i16 -1 >
226226 %B = and <8 x i16> %rA, %A
230230 }
231231
232232 ; (or (and rA, (not rC)), (and rC, rB))
233 define <8 x i16> @selb_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
233 define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
234234 %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
235235 i16 -1, i16 -1, i16 -1, i16 -1 >
236236 %B = and <8 x i16> %rA, %A
244244 ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
245245
246246 ; (or (and rC, rB), (and (not rC), rA))
247 define <16 x i8> @selb_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
247 define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
248248 %C = and <16 x i8> %rC, %rB
249249 %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
250250 i8 -1, i8 -1, i8 -1, i8 -1,
256256 }
257257
258258 ; (or (and rB, rC), (and (not rC), rA))
259 define <16 x i8> @selb_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
259 define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
260260 %C = and <16 x i8> %rB, %rC
261261 %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
262262 i8 -1, i8 -1, i8 -1, i8 -1,
268268 }
269269
270270 ; (or (and (not rC), rA), (and rB, rC))
271 define <16 x i8> @selb_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
271 define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
272272 %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
273273 i8 -1, i8 -1, i8 -1, i8 -1,
274274 i8 -1, i8 -1, i8 -1, i8 -1,
280280 }
281281
282282 ; (or (and (not rC), rA), (and rC, rB))
283 define <16 x i8> @selb_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
283 define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
284284 %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
285285 i8 -1, i8 -1, i8 -1, i8 -1,
286286 i8 -1, i8 -1, i8 -1, i8 -1,
292292 }
293293
294294 ; (or (and rC, rB), (and rA, (not rC)))
295 define <16 x i8> @selb_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
295 define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
296296 %C = and <16 x i8> %rC, %rB
297297 %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
298298 i8 -1, i8 -1, i8 -1, i8 -1,
304304 }
305305
306306 ; (or (and rB, rC), (and rA, (not rC)))
307 define <16 x i8> @selb_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
307 define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
308308 %C = and <16 x i8> %rB, %rC
309309 %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
310310 i8 -1, i8 -1, i8 -1, i8 -1,
316316 }
317317
318318 ; (or (and rA, (not rC)), (and rB, rC))
319 define <16 x i8> @selb_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
319 define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
320320 %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
321321 i8 -1, i8 -1, i8 -1, i8 -1,
322322 i8 -1, i8 -1, i8 -1, i8 -1,
328328 }
329329
330330 ; (or (and rA, (not rC)), (and rC, rB))
331 define <16 x i8> @selb_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
331 define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
332332 %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
333333 i8 -1, i8 -1, i8 -1, i8 -1,
334334 i8 -1, i8 -1, i8 -1, i8 -1,
344344 ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
345345
346346 ; (or (and rC, rB), (and (not rC), rA))
347 define i32 @selb_i32_01(i32 %rA, i32 %rB, i32 %rC) {
347 define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) {
348348 %C = and i32 %rC, %rB
349349 %A = xor i32 %rC, -1
350350 %B = and i32 %A, %rA
353353 }
354354
355355 ; (or (and rB, rC), (and (not rC), rA))
356 define i32 @selb_i32_02(i32 %rA, i32 %rB, i32 %rC) {
356 define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) {
357357 %C = and i32 %rB, %rC
358358 %A = xor i32 %rC, -1
359359 %B = and i32 %A, %rA
362362 }
363363
364364 ; (or (and (not rC), rA), (and rB, rC))
365 define i32 @selb_i32_03(i32 %rA, i32 %rB, i32 %rC) {
365 define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) {
366366 %A = xor i32 %rC, -1
367367 %B = and i32 %A, %rA
368368 %C = and i32 %rB, %rC
371371 }
372372
373373 ; (or (and (not rC), rA), (and rC, rB))
374 define i32 @selb_i32_04(i32 %rA, i32 %rB, i32 %rC) {
374 define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) {
375375 %A = xor i32 %rC, -1
376376 %B = and i32 %A, %rA
377377 %C = and i32 %rC, %rB
380380 }
381381
382382 ; (or (and rC, rB), (and rA, (not rC)))
383 define i32 @selb_i32_05(i32 %rA, i32 %rB, i32 %rC) {
383 define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) {
384384 %C = and i32 %rC, %rB
385385 %A = xor i32 %rC, -1
386386 %B = and i32 %rA, %A
389389 }
390390
391391 ; (or (and rB, rC), (and rA, (not rC)))
392 define i32 @selb_i32_06(i32 %rA, i32 %rB, i32 %rC) {
392 define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) {
393393 %C = and i32 %rB, %rC
394394 %A = xor i32 %rC, -1
395395 %B = and i32 %rA, %A
398398 }
399399
400400 ; (or (and rA, (not rC)), (and rB, rC))
401 define i32 @selb_i32_07(i32 %rA, i32 %rB, i32 %rC) {
401 define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) {
402402 %A = xor i32 %rC, -1
403403 %B = and i32 %rA, %A
404404 %C = and i32 %rB, %rC
407407 }
408408
409409 ; (or (and rA, (not rC)), (and rC, rB))
410 define i32 @selb_i32_08(i32 %rA, i32 %rB, i32 %rC) {
410 define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) {
411411 %A = xor i32 %rC, -1
412412 %B = and i32 %rA, %A
413413 %C = and i32 %rC, %rB
420420 ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
421421
422422 ; (or (and rC, rB), (and (not rC), rA))
423 define i16 @selb_i16_01(i16 %rA, i16 %rB, i16 %rC) {
423 define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) {
424424 %C = and i16 %rC, %rB
425425 %A = xor i16 %rC, -1
426426 %B = and i16 %A, %rA
429429 }
430430
431431 ; (or (and rB, rC), (and (not rC), rA))
432 define i16 @selb_i16_02(i16 %rA, i16 %rB, i16 %rC) {
432 define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) {
433433 %C = and i16 %rB, %rC
434434 %A = xor i16 %rC, -1
435435 %B = and i16 %A, %rA
438438 }
439439
440440 ; (or (and (not rC), rA), (and rB, rC))
441 define i16 @selb_i16_03(i16 %rA, i16 %rB, i16 %rC) {
441 define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) {
442442 %A = xor i16 %rC, -1
443443 %B = and i16 %A, %rA
444444 %C = and i16 %rB, %rC
447447 }
448448
449449 ; (or (and (not rC), rA), (and rC, rB))
450 define i16 @selb_i16_04(i16 %rA, i16 %rB, i16 %rC) {
450 define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) {
451451 %A = xor i16 %rC, -1
452452 %B = and i16 %A, %rA
453453 %C = and i16 %rC, %rB
456456 }
457457
458458 ; (or (and rC, rB), (and rA, (not rC)))
459 define i16 @selb_i16_05(i16 %rA, i16 %rB, i16 %rC) {
459 define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) {
460460 %C = and i16 %rC, %rB
461461 %A = xor i16 %rC, -1
462462 %B = and i16 %rA, %A
465465 }
466466
467467 ; (or (and rB, rC), (and rA, (not rC)))
468 define i16 @selb_i16_06(i16 %rA, i16 %rB, i16 %rC) {
468 define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) {
469469 %C = and i16 %rB, %rC
470470 %A = xor i16 %rC, -1
471471 %B = and i16 %rA, %A
474474 }
475475
476476 ; (or (and rA, (not rC)), (and rB, rC))
477 define i16 @selb_i16_07(i16 %rA, i16 %rB, i16 %rC) {
477 define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) {
478478 %A = xor i16 %rC, -1
479479 %B = and i16 %rA, %A
480480 %C = and i16 %rB, %rC
483483 }
484484
485485 ; (or (and rA, (not rC)), (and rC, rB))
486 define i16 @selb_i16_08(i16 %rA, i16 %rB, i16 %rC) {
486 define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) {
487487 %A = xor i16 %rC, -1
488488 %B = and i16 %rA, %A
489489 %C = and i16 %rC, %rB
496496 ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
497497
498498 ; (or (and rC, rB), (and (not rC), rA))
499 define i8 @selb_i8_01(i8 %rA, i8 %rB, i8 %rC) {
499 define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) {
500500 %C = and i8 %rC, %rB
501501 %A = xor i8 %rC, -1
502502 %B = and i8 %A, %rA
505505 }
506506
507507 ; (or (and rB, rC), (and (not rC), rA))
508 define i8 @selb_i8_02(i8 %rA, i8 %rB, i8 %rC) {
508 define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) {
509509 %C = and i8 %rB, %rC
510510 %A = xor i8 %rC, -1
511511 %B = and i8 %A, %rA
514514 }
515515
516516 ; (or (and (not rC), rA), (and rB, rC))
517 define i8 @selb_i8_03(i8 %rA, i8 %rB, i8 %rC) {
517 define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) {
518518 %A = xor i8 %rC, -1
519519 %B = and i8 %A, %rA
520520 %C = and i8 %rB, %rC
523523 }
524524
525525 ; (or (and (not rC), rA), (and rC, rB))
526 define i8 @selb_i8_04(i8 %rA, i8 %rB, i8 %rC) {
526 define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) {
527527 %A = xor i8 %rC, -1
528528 %B = and i8 %A, %rA
529529 %C = and i8 %rC, %rB
532532 }
533533
534534 ; (or (and rC, rB), (and rA, (not rC)))
535 define i8 @selb_i8_05(i8 %rA, i8 %rB, i8 %rC) {
535 define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) {
536536 %C = and i8 %rC, %rB
537537 %A = xor i8 %rC, -1
538538 %B = and i8 %rA, %A
541541 }
542542
543543 ; (or (and rB, rC), (and rA, (not rC)))
544 define i8 @selb_i8_06(i8 %rA, i8 %rB, i8 %rC) {
544 define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) {
545545 %C = and i8 %rB, %rC
546546 %A = xor i8 %rC, -1
547547 %B = and i8 %rA, %A
550550 }
551551
552552 ; (or (and rA, (not rC)), (and rB, rC))
553 define i8 @selb_i8_07(i8 %rA, i8 %rB, i8 %rC) {
553 define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) {
554554 %A = xor i8 %rC, -1
555555 %B = and i8 %rA, %A
556556 %C = and i8 %rB, %rC
559559 }
560560
561561 ; (or (and rA, (not rC)), (and rC, rB))
562 define i8 @selb_i8_08(i8 %rA, i8 %rB, i8 %rC) {
562 define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) {
563563 %A = xor i8 %rC, -1
564564 %B = and i8 %rA, %A
565565 %C = and i8 %rC, %rB
274274 %2 = ashr i64 %arg1, %1
275275 ret i64 %2
276276 }
277
278 define i32 @hi32_i64(i64 %arg) {
279 %1 = lshr i64 %arg, 32
280 %2 = trunc i64 %1 to i32
281 ret i32 %2
282 }