llvm.org GIT mirror llvm / 02d711b
- Start moving target-dependent nodes that could be represented by an instruction sequence and cannot ordinarily be simplified by DAGcombine into the various target description files or SPUDAGToDAGISel.cpp. This makes some 64-bit operations legal. - Eliminate target-dependent ISD enums. - Update tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61508 91177308-0d34-0410-b5e6-96231b3b80d8 Scott Michel 11 years ago
15 changed file(s) with 743 addition(s) and 641 deletion(s). Raw diff Collapse all Expand all
1313 // Get the target-independent interfaces which we are implementing.
1414 //
1515 include "llvm/Target/Target.td"
16
17 // Holder of code fragments (you'd think this'd already be in
18 // a td file somewhere... :-)
19
20 class CodeFrag {
21 dag Fragment = frag;
22 }
1623
1724 //===----------------------------------------------------------------------===//
1825 // Register File Description
0 //====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
1 //
2 // Cell SPU 64-bit operations
3 //
4 // Primary author: Scott Michel (scottm@aero.org)
5 //===----------------------------------------------------------------------===//
6
7 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
18 // 64-bit comparisons:
29 //
310 // 1. The instruction sequences for vector vice scalar differ by a
4 // constant.
11 // constant. In the scalar case, we're only interested in the
12 // top two 32-bit slots, whereas we're interested in an exact
13 // all-four-slot match in the vector case.
514 //
615 // 2. There are no "immediate" forms, since loading 64-bit constants
716 // could be a constant pool load.
918 // 3. i64 setcc results are i32, which are subsequently converted to a FSM
1019 // mask when used in a select pattern.
1120 //
12 // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask
13 // (TODO)
21 // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
22 // [Note: this may be moot, since gb produces v4i32 or r32.]
1423 //
15 // M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!)
24 // M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
1625 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
1726
1827 // selb instruction definition for i64. Note that the selection mask is
2130 SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
2231 [/* no pattern */]>;
2332
24 class CodeFrag {
25 dag Fragment = frag;
26 }
33 // select the negative condition:
34 class I64SELECTNegCond:
35 Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
36 (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
2737
28 class I64SELECTNegCond:
29 Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
30 (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>;
31
32 class I64SETCCNegCond:
38 // setcc the negative condition:
39 class I64SETCCNegCond:
3340 Pat<(cond R64C:$rA, R64C:$rB),
34 (XORIr32 cmpare.Fragment, -1)>;
41 (XORIr32 compare.Fragment, -1)>;
3542
3643 // The i64 seteq fragment that does the scalar->vector conversion and
3744 // comparison:
6370 defm I64EQ: CompareEqual64;
6471
6572 def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
73 def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
6674
67 def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
68 I64EQv2i64.Fragment>;
75 def : Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
76 (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
6977
70 def I64Select:
71 Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
72 (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
78 // i64 setne:
79 def : I64SETCCNegCond;
80 def : I64SELECTNegCond;
7381
74 def : I64SETCCNegCond;
75
76 def : I64SELECTNegCond;
82 // i64 setugt:
148148 }
149149
150150 bool
151 isHighLow(const SDValue &Op)
151 isHighLow(const SDValue &Op)
152152 {
153153 return (Op.getOpcode() == SPUISD::IndirectAddr
154154 && ((Op.getOperand(0).getOpcode() == SPUISD::Hi
228228 TM(tm),
229229 SPUtli(*tm.getTargetLowering())
230230 {}
231
231
232232 virtual bool runOnFunction(Function &Fn) {
233233 // Make sure we re-emit a set of the global base reg if necessary
234234 GlobalBaseReg = 0;
235235 SelectionDAGISel::runOnFunction(Fn);
236236 return true;
237237 }
238
238
239239 /// getI32Imm - Return a target constant with the specified value, of type
240240 /// i32.
241241 inline SDValue getI32Imm(uint32_t Imm) {
247247 inline SDValue getI64Imm(uint64_t Imm) {
248248 return CurDAG->getTargetConstant(Imm, MVT::i64);
249249 }
250
250
251251 /// getSmallIPtrImm - Return a target constant of pointer type.
252252 inline SDValue getSmallIPtrImm(unsigned Imm) {
253253 return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
256256 /// Select - Convert the specified operand from a target-independent to a
257257 /// target-specific node if it hasn't already been changed.
258258 SDNode *Select(SDValue Op);
259
260 //! Emit the instruction sequence for i64 shl
261 SDNode *SelectSHLi64(SDValue &Op, MVT OpVT);
262
263 //! Emit the instruction sequence for i64 srl
264 SDNode *SelectSRLi64(SDValue &Op, MVT OpVT);
265
266 //! Emit the instruction sequence for i64 sra
267 SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
259268
260269 //! Returns true if the address N is an A-form (local store) address
261270 bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
286295 switch (ConstraintCode) {
287296 default: return true;
288297 case 'm': // memory
289 if (!SelectDFormAddr(Op, Op, Op0, Op1)
298 if (!SelectDFormAddr(Op, Op, Op0, Op1)
290299 && !SelectAFormAddr(Op, Op, Op0, Op1))
291300 SelectXFormAddr(Op, Op, Op0, Op1);
292301 break;
305314 #endif
306315 break;
307316 }
308
317
309318 OutOps.push_back(Op0);
310319 OutOps.push_back(Op1);
311320 return false;
317326
318327 virtual const char *getPassName() const {
319328 return "Cell SPU DAG->DAG Pattern Instruction Selection";
320 }
321
329 }
330
322331 /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
323332 /// this target when scheduling the DAG.
324333 virtual HazardRecognizer *CreateTargetHazardRecognizer() {
325334 const TargetInstrInfo *II = TM.getInstrInfo();
326335 assert(II && "No InstrInfo?");
327 return new SPUHazardRecognizer(*II);
336 return new SPUHazardRecognizer(*II);
328337 }
329338
330339 // Include the pieces autogenerated from the target description.
374383 abort();
375384 /*NOTREACHED*/
376385
377 case SPUISD::AFormAddr:
386 case SPUISD::AFormAddr:
378387 // Just load from memory if there's only a single use of the location,
379388 // otherwise, this will get handled below with D-form offset addresses
380389 if (N.hasOneUse()) {
403412 return false;
404413 }
405414
406 bool
415 bool
407416 SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
408417 SDValue &Base) {
409418 const int minDForm2Offset = -(1 << 7);
526535 ConstantSDNode *CN = cast(Op0);
527536 offset = int32_t(CN->getSExtValue());
528537 idxOp = Op1;
529 }
538 }
530539
531540 if (offset >= minOffset && offset <= maxOffset) {
532541 Base = CurDAG->getTargetConstant(offset, PtrTy);
621630 if (N->isMachineOpcode()) {
622631 return NULL; // Already selected.
623632 } else if (Opc == ISD::FrameIndex) {
624 // Selects to (add $sp, FI * stackSlotSize)
625 int FI =
626 SPUFrameInfo::FItoStackOffset(cast(N)->getIndex());
627 MVT PtrVT = SPUtli.getPointerTy();
628
629 // Adjust stack slot to actual offset in frame:
630 if (isS10Constant(FI)) {
631 DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AIr32 $sp, "
632 << FI
633 << "\n");
633 int FI = cast(N)->getIndex();
634 SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
635 SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType());
636
637 if (FI < 128) {
634638 NewOpc = SPU::AIr32;
635 Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT);
636 Ops[1] = CurDAG->getTargetConstant(FI, PtrVT);
639 Ops[0] = TFI;
640 Ops[1] = Imm0;
637641 n_ops = 2;
638642 } else {
639 DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with Ar32 $sp, "
640 << FI
641 << "\n");
642643 NewOpc = SPU::Ar32;
643 Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT);
644 Ops[1] = CurDAG->getConstant(FI, PtrVT);
644 Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType());
645 Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, Op.getValueType(),
646 TFI, Imm0), 0);
645647 n_ops = 2;
646648 }
647649 } else if (Opc == ISD::ZERO_EXTEND) {
659661 Ops[1] = Op1.getOperand(1);
660662 n_ops = 2;
661663 }
664 }
665 } else if (Opc == ISD::SHL) {
666 if (OpVT == MVT::i64) {
667 return SelectSHLi64(Op, OpVT);
668 }
669 } else if (Opc == ISD::SRL) {
670 if (OpVT == MVT::i64) {
671 return SelectSRLi64(Op, OpVT);
672 }
673 } else if (Opc == ISD::SRA) {
674 if (OpVT == MVT::i64) {
675 return SelectSRAi64(Op, OpVT);
662676 }
663677 } else if (Opc == SPUISD::LDRESULT) {
664678 // Custom select instructions for LDRESULT
712726 n_ops = 2;
713727 }
714728 }
715
729
716730 if (n_ops > 0) {
717731 if (N->hasOneUse())
718732 return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
722736 return SelectCode(Op);
723737 }
724738
725 /// createPPCISelDag - This pass converts a legalized DAG into a
739 /*!
740 * Emit the instruction sequence for i64 left shifts. The basic algorithm
741 * is to fill the bottom two word slots with zeros so that zeros are shifted
742 * in as the entire quadword is shifted left.
743 *
744 * \note This code could also be used to implement v2i64 shl.
745 *
746 * @param Op The shl operand
747 * @param OpVT Op's machine value value type (doesn't need to be passed, but
748 * makes life easier.)
749 * @return The SDNode with the entire instruction sequence
750 */
751 SDNode *
752 SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) {
753 SDValue Op0 = Op.getOperand(0);
754 MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
755 SDValue ShiftAmt = Op.getOperand(1);
756 MVT ShiftAmtVT = ShiftAmt.getValueType();
757 SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
758 SDValue SelMaskVal;
759
760 VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0);
761 SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
762 SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT, SelMaskVal);
763 ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, VecVT,
764 CurDAG->getTargetConstant(0, OpVT));
765 VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, VecVT,
766 SDValue(ZeroFill, 0),
767 SDValue(VecOp0, 0),
768 SDValue(SelMask, 0));
769
770 if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) {
771 unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
772 unsigned bits = unsigned(CN->getZExtValue()) & 7;
773
774 if (bytes > 0) {
775 Shift =
776 CurDAG->getTargetNode(SPU::SHLQBYIv2i64, VecVT,
777 SDValue(VecOp0, 0),
778 CurDAG->getTargetConstant(bytes, ShiftAmtVT));
779 }
780
781 if (bits > 0) {
782 Shift =
783 CurDAG->getTargetNode(SPU::SHLQBIIv2i64, VecVT,
784 SDValue((Shift != 0 ? Shift : VecOp0), 0),
785 CurDAG->getTargetConstant(bits, ShiftAmtVT));
786 }
787 } else {
788 SDNode *Bytes =
789 CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT,
790 ShiftAmt,
791 CurDAG->getTargetConstant(3, ShiftAmtVT));
792 SDNode *Bits =
793 CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT,
794 ShiftAmt,
795 CurDAG->getTargetConstant(7, ShiftAmtVT));
796 Shift =
797 CurDAG->getTargetNode(SPU::SHLQBYv2i64, VecVT,
798 SDValue(VecOp0, 0), SDValue(Bytes, 0));
799 Shift =
800 CurDAG->getTargetNode(SPU::SHLQBIv2i64, VecVT,
801 SDValue(Shift, 0), SDValue(Bits, 0));
802 }
803
804 return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
805 }
806
807 /*!
808 * Emit the instruction sequence for i64 logical right shifts.
809 *
810 * @param Op The shl operand
811 * @param OpVT Op's machine value value type (doesn't need to be passed, but
812 * makes life easier.)
813 * @return The SDNode with the entire instruction sequence
814 */
815 SDNode *
816 SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) {
817 SDValue Op0 = Op.getOperand(0);
818 MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
819 SDValue ShiftAmt = Op.getOperand(1);
820 MVT ShiftAmtVT = ShiftAmt.getValueType();
821 SDNode *VecOp0, *Shift = 0;
822
823 VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0);
824
825 if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) {
826 unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
827 unsigned bits = unsigned(CN->getZExtValue()) & 7;
828
829 if (bytes > 0) {
830 Shift =
831 CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, VecVT,
832 SDValue(VecOp0, 0),
833 CurDAG->getTargetConstant(bytes, ShiftAmtVT));
834 }
835
836 if (bits > 0) {
837 Shift =
838 CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, VecVT,
839 SDValue((Shift != 0 ? Shift : VecOp0), 0),
840 CurDAG->getTargetConstant(bits, ShiftAmtVT));
841 }
842 } else {
843 SDNode *Bytes =
844 CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT,
845 ShiftAmt,
846 CurDAG->getTargetConstant(3, ShiftAmtVT));
847 SDNode *Bits =
848 CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT,
849 ShiftAmt,
850 CurDAG->getTargetConstant(7, ShiftAmtVT));
851
852 // Ensure that the shift amounts are negated!
853 Bytes = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT,
854 SDValue(Bytes, 0),
855 CurDAG->getTargetConstant(0, ShiftAmtVT));
856
857 Bits = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT,
858 SDValue(Bits, 0),
859 CurDAG->getTargetConstant(0, ShiftAmtVT));
860
861 Shift =
862 CurDAG->getTargetNode(SPU::ROTQMBYv2i64, VecVT,
863 SDValue(VecOp0, 0), SDValue(Bytes, 0));
864 Shift =
865 CurDAG->getTargetNode(SPU::ROTQMBIv2i64, VecVT,
866 SDValue(Shift, 0), SDValue(Bits, 0));
867 }
868
869 return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
870 }
871
872 /*!
873 * Emit the instruction sequence for i64 arithmetic right shifts.
874 *
875 * @param Op The shl operand
876 * @param OpVT Op's machine value value type (doesn't need to be passed, but
877 * makes life easier.)
878 * @return The SDNode with the entire instruction sequence
879 */
880 SDNode *
881 SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
882 // Promote Op0 to vector
883 MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
884 SDValue ShiftAmt = Op.getOperand(1);
885 MVT ShiftAmtVT = ShiftAmt.getValueType();
886
887 SDNode *VecOp0 =
888 CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op.getOperand(0));
889
890 SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
891 SDNode *SignRot =
892 CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, MVT::v2i64,
893 SDValue(VecOp0, 0), SignRotAmt);
894 SDNode *UpperHalfSign =
895 CurDAG->getTargetNode(SPU::ORi32_v4i32, MVT::i32, SDValue(SignRot, 0));
896
897 SDNode *UpperHalfSignMask =
898 CurDAG->getTargetNode(SPU::FSM64r32, VecVT, SDValue(UpperHalfSign, 0));
899 SDNode *UpperLowerMask =
900 CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT,
901 CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
902 SDNode *UpperLowerSelect =
903 CurDAG->getTargetNode(SPU::SELBv2i64, VecVT,
904 SDValue(UpperHalfSignMask, 0),
905 SDValue(VecOp0, 0),
906 SDValue(UpperLowerMask, 0));
907
908 SDNode *Shift = 0;
909
910 if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) {
911 unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
912 unsigned bits = unsigned(CN->getZExtValue()) & 7;
913
914 if (bytes > 0) {
915 bytes = 31 - bytes;
916 Shift =
917 CurDAG->getTargetNode(SPU::ROTQBYIv2i64, VecVT,
918 SDValue(UpperLowerSelect, 0),
919 CurDAG->getTargetConstant(bytes, ShiftAmtVT));
920 }
921
922 if (bits > 0) {
923 bits = 8 - bits;
924 Shift =
925 CurDAG->getTargetNode(SPU::ROTQBIIv2i64, VecVT,
926 SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
927 CurDAG->getTargetConstant(bits, ShiftAmtVT));
928 }
929 } else {
930 SDNode *NegShift =
931 CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT,
932 ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
933
934 Shift =
935 CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, VecVT,
936 SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
937 Shift =
938 CurDAG->getTargetNode(SPU::ROTQBIv2i64, VecVT,
939 SDValue(Shift, 0), SDValue(NegShift, 0));
940 }
941
942 return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
943 }
944
945 /// createSPUISelDag - This pass converts a legalized DAG into a
726946 /// SPU-specific DAG, ready for instruction scheduling.
727947 ///
728948 FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
203203 setOperationAction(ISD::SRL, MVT::i8, Custom);
204204 setOperationAction(ISD::SRA, MVT::i8, Custom);
205205
206 // SPU needs custom lowering for shift left/right for i64
207 setOperationAction(ISD::SHL, MVT::i64, Custom);
208 setOperationAction(ISD::SRL, MVT::i64, Custom);
209 setOperationAction(ISD::SRA, MVT::i64, Custom);
206 // Make these operations legal and handle them during instruction selection:
207 setOperationAction(ISD::SHL, MVT::i64, Legal);
208 setOperationAction(ISD::SRL, MVT::i64, Legal);
209 setOperationAction(ISD::SRA, MVT::i64, Legal);
210210
211211 // Custom lower i8, i32 and i64 multiplications
212212 setOperationAction(ISD::MUL, MVT::i8, Custom);
214214 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
215215
216216 // Need to custom handle (some) common i8, i64 math ops
217 setOperationAction(ISD::ADD, MVT::i8, Custom);
217218 setOperationAction(ISD::ADD, MVT::i64, Custom);
218219 setOperationAction(ISD::SUB, MVT::i8, Custom);
219220 setOperationAction(ISD::SUB, MVT::i64, Custom);
248249 // Zero extension and sign extension for i64 have to be
249250 // custom legalized
250251 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
251 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
252252 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
253253
254254 // Custom lower i128 -> i64 truncates
261261 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
262262
263263 // FDIV on SPU requires custom lowering
264 setOperationAction(ISD::FDIV, MVT::f32, Custom);
265264 setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
266265
267266 // SPU has [U|S]INT_TO_FP
339338 setOperationAction(ISD::ADD , VT, Legal);
340339 setOperationAction(ISD::SUB , VT, Legal);
341340 // mul has to be custom lowered.
342 setOperationAction(ISD::MUL , VT, Custom);
341 // TODO: v2i64 vector multiply
342 setOperationAction(ISD::MUL , VT, Legal);
343343
344344 setOperationAction(ISD::AND , VT, Legal);
345345 setOperationAction(ISD::OR , VT, Legal);
353353 setOperationAction(ISD::SREM, VT, Expand);
354354 setOperationAction(ISD::UDIV, VT, Expand);
355355 setOperationAction(ISD::UREM, VT, Expand);
356 setOperationAction(ISD::FDIV, VT, Custom);
357356
358357 // Custom lower build_vector, constant pool spills, insert and
359358 // extract vector elements:
370369 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
371370 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
372371
373 // FIXME: This is only temporary until I put all vector multiplications in
374 // SPUInstrInfo.td:
375 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
372 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
376373
377374 setShiftAmountType(MVT::i32);
378375 setBooleanContents(ZeroOrNegativeOneBooleanContent);
410407 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
411408 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
412409 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
413 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
414 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
415 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
416 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
417410 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
418411 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
419412 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
421414 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
422415 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
423416 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
424 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
425 "SPUISD::ROTQUAD_RZ_BYTES";
426 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
427 "SPUISD::ROTQUAD_RZ_BITS";
428 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
429 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
430 "SPUISD::ROTBYTES_LEFT_BITS";
431417 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
432418 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
433419 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
434420 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
435421 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
436422 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
437 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
438 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
439423 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
440424 }
441425
19211905 return SDValue();
19221906 }
19231907
1924 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1925 switch (Op.getValueType().getSimpleVT()) {
1926 default:
1927 cerr << "CellSPU: Unknown vector multiplication, got "
1928 << Op.getValueType().getMVTString()
1929 << "\n";
1930 abort();
1931 /*NOTREACHED*/
1932
1933 case MVT::v4i32:
1934 break;
1935
1936 // Multiply two v8i16 vectors (pipeline friendly version):
1937 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1938 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1939 // c) Use SELB to select upper and lower halves from the intermediate results
1940 //
1941 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1942 // dual-issue. This code does manage to do this, even if it's a little on
1943 // the wacky side
1944 case MVT::v8i16: {
1945 MachineFunction &MF = DAG.getMachineFunction();
1946 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1947 SDValue Chain = Op.getOperand(0);
1948 SDValue rA = Op.getOperand(0);
1949 SDValue rB = Op.getOperand(1);
1950 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1951 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1952
1953 SDValue FSMBOp =
1954 DAG.getCopyToReg(Chain, FSMBIreg,
1955 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1956 DAG.getConstant(0xcccc, MVT::i16)));
1957
1958 SDValue HHProd =
1959 DAG.getCopyToReg(FSMBOp, HiProdReg,
1960 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1961
1962 SDValue HHProd_v4i32 =
1963 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1964 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1965
1966 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1967 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1968 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1969 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1970 HHProd_v4i32,
1971 DAG.getConstant(16, MVT::i16))),
1972 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1973 }
1974
1975 // This M00sE is N@stI! (apologies to Monty Python)
1976 //
1977 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1978 // is to break it all apart, sign extend, and reassemble the various
1979 // intermediate products.
1980 case MVT::v16i8: {
1981 SDValue rA = Op.getOperand(0);
1982 SDValue rB = Op.getOperand(1);
1983 SDValue c8 = DAG.getConstant(8, MVT::i32);
1984 SDValue c16 = DAG.getConstant(16, MVT::i32);
1985
1986 SDValue LLProd =
1987 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1990
1991 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1992
1993 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1994
1995 SDValue LHProd =
1996 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1997 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1998
1999 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
2000 DAG.getConstant(0x2222, MVT::i16));
2001
2002 SDValue LoProdParts =
2003 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2004 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2005 LLProd, LHProd, FSMBmask));
2006
2007 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2008
2009 SDValue LoProd =
2010 DAG.getNode(ISD::AND, MVT::v4i32,
2011 LoProdParts,
2012 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2013 LoProdMask, LoProdMask,
2014 LoProdMask, LoProdMask));
2015
2016 SDValue rAH =
2017 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2018 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2019
2020 SDValue rBH =
2021 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2022 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2023
2024 SDValue HLProd =
2025 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2026 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2027 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2028
2029 SDValue HHProd_1 =
2030 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2031 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2032 DAG.getNode(SPUISD::VEC_SRA,
2033 MVT::v4i32, rAH, c8)),
2034 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2035 DAG.getNode(SPUISD::VEC_SRA,
2036 MVT::v4i32, rBH, c8)));
2037
2038 SDValue HHProd =
2039 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2040 HLProd,
2041 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2042 FSMBmask);
2043
2044 SDValue HiProd =
2045 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2046
2047 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2048 DAG.getNode(ISD::OR, MVT::v4i32,
2049 LoProd, HiProd));
2050 }
2051 }
2052
2053 return SDValue();
2054 }
2055
2056 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2057 MachineFunction &MF = DAG.getMachineFunction();
2058 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2059
2060 SDValue A = Op.getOperand(0);
2061 SDValue B = Op.getOperand(1);
2062 MVT VT = Op.getValueType();
2063
2064 unsigned VRegBR, VRegC;
2065
2066 if (VT == MVT::f32) {
2067 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2068 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2069 } else {
2070 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2071 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2072 }
2073 // TODO: make sure we're feeding FPInterp the right arguments
2074 // Right now: fi B, frest(B)
2075
2076 // Computes BRcpl =
2077 // (Floating Interpolate (FP Reciprocal Estimate B))
2078 SDValue BRcpl =
2079 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2080 DAG.getNode(SPUISD::FPInterp, VT, B,
2081 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2082
2083 // Computes A * BRcpl and stores in a temporary register
2084 SDValue AxBRcpl =
2085 DAG.getCopyToReg(BRcpl, VRegC,
2086 DAG.getNode(ISD::FMUL, VT, A,
2087 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2088 // What's the Chain variable do? It's magic!
2089 // TODO: set Chain = Op(0).getEntryNode()
2090
2091 return DAG.getNode(ISD::FADD, VT,
2092 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2093 DAG.getNode(ISD::FMUL, VT,
2094 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2095 DAG.getNode(ISD::FSUB, VT, A,
2096 DAG.getNode(ISD::FMUL, VT, B,
2097 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2098 }
2099
21001908 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
21011909 MVT VT = Op.getValueType();
21021910 SDValue N = Op.getOperand(0);
22952103 assert(0 && "Unhandled i8 math operator");
22962104 /*NOTREACHED*/
22972105 break;
2106 case ISD::ADD: {
2107 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2108 // the result:
2109 SDValue N1 = Op.getOperand(1);
2110 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2111 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2112 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2113 DAG.getNode(Opc, MVT::i16, N0, N1));
2114
2115 }
2116
22982117 case ISD::SUB: {
22992118 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
23002119 // the result:
23012120 SDValue N1 = Op.getOperand(1);
2302 N0 = (N0.getOpcode() != ISD::Constant
2303 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2304 : DAG.getConstant(cast(N0)->getSExtValue(),
2305 MVT::i16));
2306 N1 = (N1.getOpcode() != ISD::Constant
2307 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2308 : DAG.getConstant(cast(N1)->getSExtValue(),
2309 MVT::i16));
2121 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2122 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
23102123 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
23112124 DAG.getNode(Opc, MVT::i16, N0, N1));
23122125 }
23962209
23972210 switch (Opc) {
23982211 case ISD::ZERO_EXTEND:
2399 case ISD::SIGN_EXTEND:
24002212 case ISD::ANY_EXTEND: {
24012213 MVT Op0VT = Op0.getValueType();
24022214 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
24092221 SDValue PromoteScalar =
24102222 DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
24112223
2412 if (Opc != ISD::SIGN_EXTEND) {
2413 // Use a shuffle to zero extend the i32 to i64 directly:
2414 SDValue shufMask =
2415 DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2416 DAG.getConstant(0x80808080, MVT::i32),
2417 DAG.getConstant(0x00010203, MVT::i32),
2418 DAG.getConstant(0x80808080, MVT::i32),
2419 DAG.getConstant(0x08090a0b, MVT::i32));
2420 SDValue zextShuffle =
2421 DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2422 PromoteScalar, PromoteScalar, shufMask);
2423
2424 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2425 DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2426 } else {
2427 // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2428 // right and propagate the sign bit) instruction.
2429 SDValue RotQuad =
2430 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2431 PromoteScalar, DAG.getConstant(4, MVT::i32));
2432 SDValue SignQuad =
2433 DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2434 PromoteScalar, DAG.getConstant(32, MVT::i32));
2435 SDValue SelMask =
2436 DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2437 DAG.getConstant(0xf0f0, MVT::i16));
2438 SDValue CombineQuad =
2439 DAG.getNode(SPUISD::SELB, Op0VecVT,
2440 SignQuad, RotQuad, SelMask);
2441
2442 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2443 DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2444 }
2224 // Use a shuffle to zero extend the i32 to i64 directly:
2225 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2226 DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203,
2227 MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(
2228 0x08090a0b, MVT::i32));
2229 SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar,
2230 PromoteScalar, shufMask);
2231
2232 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT,
2233 VecVT, zextShuffle));
24452234 }
24462235
24472236 case ISD::ADD: {
25002289 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
25012290 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
25022291 Op0, Op1, ShiftedBorrow));
2503 }
2504
2505 case ISD::SHL: {
2506 SDValue ShiftAmt = Op.getOperand(1);
2507 MVT ShiftAmtVT = ShiftAmt.getValueType();
2508 SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0);
2509 SDValue MaskLower =
2510 DAG.getNode(SPUISD::SELB, VecVT,
2511 Op0Vec,
2512 DAG.getConstant(0, VecVT),
2513 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2514 DAG.getConstant(0xff00ULL, MVT::i16)));
2515 SDValue ShiftAmtBytes =
2516 DAG.getNode(ISD::SRL, ShiftAmtVT,
2517 ShiftAmt,
2518 DAG.getConstant(3, ShiftAmtVT));
2519 SDValue ShiftAmtBits =
2520 DAG.getNode(ISD::AND, ShiftAmtVT,
2521 ShiftAmt,
2522 DAG.getConstant(7, ShiftAmtVT));
2523
2524 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2525 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2526 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2527 MaskLower, ShiftAmtBytes),
2528 ShiftAmtBits));
2529 }
2530
2531 case ISD::SRL: {
2532 MVT VT = Op.getValueType();
2533 SDValue ShiftAmt = Op.getOperand(1);
2534 MVT ShiftAmtVT = ShiftAmt.getValueType();
2535 SDValue ShiftAmtBytes =
2536 DAG.getNode(ISD::SRL, ShiftAmtVT,
2537 ShiftAmt,
2538 DAG.getConstant(3, ShiftAmtVT));
2539 SDValue ShiftAmtBits =
2540 DAG.getNode(ISD::AND, ShiftAmtVT,
2541 ShiftAmt,
2542 DAG.getConstant(7, ShiftAmtVT));
2543
2544 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2545 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2546 Op0, ShiftAmtBytes),
2547 ShiftAmtBits);
2548 }
2549
2550 case ISD::SRA: {
2551 // Promote Op0 to vector
2552 SDValue Op0 =
2553 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2554 SDValue ShiftAmt = Op.getOperand(1);
2555 MVT ShiftVT = ShiftAmt.getValueType();
2556
2557 // Negate variable shift amounts
2558 if (!isa(ShiftAmt)) {
2559 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2560 DAG.getConstant(0, ShiftVT), ShiftAmt);
2561 }
2562
2563 SDValue UpperHalfSign =
2564 DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2565 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2566 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2567 Op0, DAG.getConstant(31, MVT::i32))));
2568 SDValue UpperHalfSignMask =
2569 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2570 SDValue UpperLowerMask =
2571 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2572 DAG.getConstant(0xff00, MVT::i16));
2573 SDValue UpperLowerSelect =
2574 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2575 UpperHalfSignMask, Op0, UpperLowerMask);
2576 SDValue RotateLeftBytes =
2577 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2578 UpperLowerSelect, ShiftAmt);
2579 SDValue RotateLeftBits =
2580 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2581 RotateLeftBytes, ShiftAmt);
2582
2583 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2584 RotateLeftBits);
25852292 }
25862293 }
25872294
28892596 return LowerRET(Op, DAG, getTargetMachine());
28902597
28912598
2599 case ISD::ZERO_EXTEND:
2600 case ISD::ANY_EXTEND:
2601 return LowerI64Math(Op, DAG, Opc);
2602
28922603 // i8, i64 math ops:
2893 case ISD::ZERO_EXTEND:
2894 case ISD::SIGN_EXTEND:
2895 case ISD::ANY_EXTEND:
28962604 case ISD::ADD:
28972605 case ISD::SUB:
28982606 case ISD::ROTR:
29272635
29282636 // Vector and i8 multiply:
29292637 case ISD::MUL:
2930 if (VT.isVector())
2931 return LowerVectorMUL(Op, DAG);
2932 else if (VT == MVT::i8)
2638 if (VT == MVT::i8)
29332639 return LowerI8Math(Op, DAG, Opc, *this);
2934
2935 case ISD::FDIV:
2936 if (VT == MVT::f32 || VT == MVT::v4f32)
2937 return LowerFDIVf32(Op, DAG);
2938 #if 0
2939 // This is probably a libcall
2940 else if (Op.getValueType() == MVT::f64)
2941 return LowerFDIVf64(Op, DAG);
2942 #endif
2943 else
2944 assert(0 && "Calling FDIV on unsupported MVT");
29452640
29462641 case ISD::CTPOP:
29472642 return LowerCTPOP(Op, DAG);
31182813 case SPUISD::VEC_SHL:
31192814 case SPUISD::VEC_SRL:
31202815 case SPUISD::VEC_SRA:
3121 case SPUISD::ROTQUAD_RZ_BYTES:
3122 case SPUISD::ROTQUAD_RZ_BITS:
31232816 case SPUISD::ROTBYTES_LEFT: {
31242817 SDValue Op1 = N->getOperand(1);
31252818
32672960 }
32682961
32692962 #if 0
3270 case MPY:
3271 case MPYU:
3272 case MPYH:
3273 case MPYHH:
32742963 case SPUISD::SHLQUAD_L_BITS:
32752964 case SPUISD::SHLQUAD_L_BYTES:
32762965 case SPUISD::VEC_SHL:
32782967 case SPUISD::VEC_SRA:
32792968 case SPUISD::VEC_ROTL:
32802969 case SPUISD::VEC_ROTR:
3281 case SPUISD::ROTQUAD_RZ_BYTES:
3282 case SPUISD::ROTQUAD_RZ_BITS:
32832970 case SPUISD::ROTBYTES_LEFT:
32842971 case SPUISD::SELECT_MASK:
32852972 case SPUISD::SELB:
3286 case SPUISD::FPInterp:
3287 case SPUISD::FPRecipEst:
32882973 case SPUISD::SEXT32TO64:
32892974 #endif
32902975 }
32912976 }
3292
2977
32932978 unsigned
32942979 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
32952980 unsigned Depth) const {
2323 enum NodeType {
2424 // Start the numbering where the builting ops and target ops leave off.
2525 FIRST_NUMBER = ISD::BUILTIN_OP_END,
26
26
2727 // Pseudo instructions:
2828 RET_FLAG, ///< Return with flag, matched by bi instruction
29
29
3030 Hi, ///< High address component (upper 16)
3131 Lo, ///< Low address component (lower 16)
3232 PCRelAddr, ///< Program counter relative address
4040 CNTB, ///< Count leading ones in bytes
4141 PREFSLOT2VEC, ///< Promote scalar->vector
4242 VEC2PREFSLOT, ///< Extract element 0
43 MPY, ///< 16-bit Multiply (low parts of a 32-bit)
44 MPYU, ///< Multiply Unsigned
45 MPYH, ///< Multiply High
46 MPYHH, ///< Multiply High-High
4743 SHLQUAD_L_BITS, ///< Rotate quad left, by bits
4844 SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes
4945 VEC_SHL, ///< Vector shift left
5147 VEC_SRA, ///< Vector shift right (arithmetic)
5248 VEC_ROTL, ///< Vector rotate left
5349 VEC_ROTR, ///< Vector rotate right
54 ROTQUAD_RZ_BYTES, ///< Rotate quad right, by bytes, zero fill
55 ROTQUAD_RZ_BITS, ///< Rotate quad right, by bits, zero fill
5650 ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
5751 ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
5852 SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
6256 CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED
6357 SUB_EXTENDED, ///< Subtract extended, with borrow
6458 BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED
65 FPInterp, ///< Floating point interpolate
66 FPRecipEst, ///< Floating point reciprocal estimate
6759 SEXT32TO64, ///< Sign-extended 32-bit const -> 64-bits
6860 LAST_SPUISD ///< Last user-defined instruction
6961 };
8678 }
8779
8880 class SPUTargetMachine; // forward dec'l.
89
81
9082 class SPUTargetLowering :
9183 public TargetLowering
9284 {
9688
9789 public:
9890 SPUTargetLowering(SPUTargetMachine &TM);
99
91
10092 /// getTargetNodeName() - This method returns the name of a target specific
10193 /// DAG node.
10294 virtual const char *getTargetNodeName(unsigned Opcode) const;
10395
10496 /// getSetCCResultType - Return the ValueType for ISD::SETCC
10597 virtual MVT getSetCCResultType(const SDValue &) const;
106
98
10799 //! Custom lowering hooks
108100 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
109101
115107
116108 virtual void computeMaskedBitsForTargetNode(const SDValue Op,
117109 const APInt &Mask,
118 APInt &KnownZero,
110 APInt &KnownZero,
119111 APInt &KnownOne,
120112 const SelectionDAG &DAG,
121113 unsigned Depth = 0) const;
125117
126118 ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
127119
128 std::pair
120 std::pair
129121 getRegForInlineAsmConstraint(const std::string &Constraint,
130122 MVT VT) const;
131123
132124 void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter,
133 bool hasMemory,
125 bool hasMemory,
134126 std::vector &Ops,
135127 SelectionDAG &DAG) const;
136128
8181 case SPU::ORIi8i32:
8282 case SPU::AHIvec:
8383 case SPU::AHIr16:
84 case SPU::AIvec:
84 case SPU::AIv4i32:
8585 assert(MI.getNumOperands() == 3 &&
8686 MI.getOperand(0).isReg() &&
8787 MI.getOperand(1).isReg() &&
9797 assert(MI.getNumOperands() == 3 &&
9898 "wrong number of operands to AIr32");
9999 if (MI.getOperand(0).isReg() &&
100 (MI.getOperand(1).isReg() ||
101 MI.getOperand(1).isFI()) &&
100 MI.getOperand(1).isReg() &&
102101 (MI.getOperand(2).isImm() &&
103102 MI.getOperand(2).getImm() == 0)) {
104103 sourceReg = MI.getOperand(1).getReg();
264263 // reg class to any other reg class containing R3. This is required because
265264 // we instruction select bitconvert i64 -> f64 as a noop for example, so our
266265 // types have no specific meaning.
267
266
268267 if (DestRC == SPU::R8CRegisterClass) {
269268 BuildMI(MBB, MI, get(SPU::ORBIr8), DestReg).addReg(SrcReg).addImm(0);
270269 } else if (DestRC == SPU::R16CRegisterClass) {
290289 // Attempt to copy unknown/unsupported register class!
291290 return false;
292291 }
293
292
294293 return true;
295294 }
296295
463462 unsigned OpNum = Ops[0];
464463 unsigned Opc = MI->getOpcode();
465464 MachineInstr *NewMI = 0;
466
465
467466 if ((Opc == SPU::ORr32
468467 || Opc == SPU::ORv4i32)
469468 && MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
507506
508507 // Get the last instruction in the block.
509508 MachineInstr *LastInst = I;
510
509
511510 // If there is only one terminator instruction, process it.
512511 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
513512 if (isUncondBranch(LastInst)) {
523522 // Otherwise, don't know what this is.
524523 return true;
525524 }
526
525
527526 // Get the instruction before it if it's a terminator.
528527 MachineInstr *SecondLastInst = I;
529528
531530 if (SecondLastInst && I != MBB.begin() &&
532531 isUnpredicatedTerminator(--I))
533532 return true;
534
533
535534 // If the block ends with a conditional and unconditional branch, handle it.
536535 if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
537536 TBB = SecondLastInst->getOperand(1).getMBB();
540539 FBB = LastInst->getOperand(0).getMBB();
541540 return false;
542541 }
543
542
544543 // If the block ends with two unconditional branches, handle it. The second
545544 // one is not executed, so remove it.
546545 if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
553552 // Otherwise, can't handle this.
554553 return true;
555554 }
556
555
557556 unsigned
558557 SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
559558 MachineBasicBlock::iterator I = MBB.end();
577576 I->eraseFromParent();
578577 return 2;
579578 }
580
579
581580 unsigned
582581 SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
583582 MachineBasicBlock *FBB,
584583 const SmallVectorImpl &Cond) const {
585584 // Shouldn't be a fall through.
586585 assert(TBB && "InsertBranch must not be told to insert a fallthrough");
587 assert((Cond.size() == 2 || Cond.size() == 0) &&
586 assert((Cond.size() == 2 || Cond.size() == 0) &&
588587 "SPU branch conditions have two components!");
589
588
590589 // One-way branch.
591590 if (FBB == 0) {
592591 if (Cond.empty()) // Unconditional branch
599598 }
600599 return 1;
601600 }
602
601
603602 // Two-way Conditional Branch.
604603 #if 0
605604 BuildMI(&MBB, get(SPU::BRNZ))
582582 def AHIr16:
583583 RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
584584 "ahi\t$rT, $rA, $val", IntegerOp,
585 [(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>;
585 [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
586
587 // v4i32, i32 add instruction:
586588
587589 class AInst pattern>:
588590 RRForm<0b00000011000, OOL, IOL,
603605 def v16i8: AVecInst;
604606
605607 def r32: ARegInst;
606 def r8: AInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), [/* no pattern */]>;
607608 }
608609
609610 defm A : AddInstruction;
610611
611 def AIvec:
612 RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
613 "ai\t$rT, $rA, $val", IntegerOp,
614 [(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA),
615 v4i32SExt10Imm:$val))]>;
616
617 def AIr32:
618 RI10Form<0b00111000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
619 "ai\t$rT, $rA, $val", IntegerOp,
620 [(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>;
612 class AIInst pattern>:
613 RI10Form<0b00111000, OOL, IOL,
614 "ai\t$rT, $rA, $val", IntegerOp,
615 pattern>;
616
617 class AIVecInst:
618 AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
619 [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
620
621 class AIFPVecInst:
622 AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
623 [/* no pattern */]>;
624
625 class AIRegInst:
626 AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
627 [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
628
629 // This is used to add epsilons to floating point numbers in the f32 fdiv code:
630 class AIFPInst:
631 AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
632 [/* no pattern */]>;
633
634 multiclass AddImmediate {
635 def v4i32: AIVecInst;
636
637 def r32: AIRegInst;
638
639 def v4f32: AIFPVecInst;
640 def f32: AIFPInst;
641 }
642
643 defm AI : AddImmediate;
621644
622645 def SFHvec:
623646 RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
794817 def MPYv8i16:
795818 RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
796819 "mpy\t$rT, $rA, $rB", IntegerMulDiv,
797 [(set (v8i16 VECREG:$rT), (SPUmpy_vec (v8i16 VECREG:$rA),
798 (v8i16 VECREG:$rB)))]>;
820 [/* no pattern */]>;
799821
800822 def MPYr16:
801823 RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
811833
812834 def MPYUv4i32:
813835 MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
814 [(set (v4i32 VECREG:$rT),
815 (SPUmpyu_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
836 [/* no pattern */]>;
816837
817838 def MPYUr16:
818839 MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
820841
821842 def MPYUr32:
822843 MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
823 [(set R32C:$rT, (SPUmpyu_int R32C:$rA, R32C:$rB))]>;
844 [/* no pattern */]>;
824845
825846 // mpyi: multiply 16 x s10imm -> 32 result.
826847
891912
892913 def MPYHv4i32:
893914 MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
894 [(set (v4i32 VECREG:$rT),
895 (SPUmpyh_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
915 [/* no pattern */]>;
896916
897917 def MPYHr32:
898918 MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
899 [(set R32C:$rT, (SPUmpyh_int R32C:$rA, R32C:$rB))]>;
919 [/* no pattern */]>;
900920
901921 // mpys: multiply high and shift right (returns the top half of
902922 // a 16-bit multiply, sign extended to 32 bits.)
923
924 class MPYSInst:
925 RRForm<0b11100011110, OOL, IOL,
926 "mpys\t$rT, $rA, $rB", IntegerMulDiv,
927 [/* no pattern */]>;
928
903929 def MPYSvec:
904 RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
905 "mpys\t$rT, $rA, $rB", IntegerMulDiv,
906 []>;
907
930 MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
931
908932 def MPYSr16:
909 RRForm<0b11100011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
910 "mpys\t$rT, $rA, $rB", IntegerMulDiv,
911 []>;
933 MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
912934
913935 // mpyhh: multiply high-high (returns the 32-bit result from multiplying
914936 // the top 16 bits of the $rA, $rB)
937
938 class MPYHHInst:
939 RRForm<0b01100011110, OOL, IOL,
940 "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
941 [/* no pattern */]>;
942
915943 def MPYHHv8i16:
916 RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
917 "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
918 [(set (v8i16 VECREG:$rT),
919 (SPUmpyhh_vec (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
944 MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
920945
921946 def MPYHHr32:
922 RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
923 "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
924 []>;
947 MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
925948
926949 // mpyhha: Multiply high-high, add to $rT:
950
951 class MPYHHAInst:
952 RRForm<0b01100010110, OOL, IOL,
953 "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
954 [/* no pattern */]>;
955
927956 def MPYHHAvec:
928 RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
929 "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
930 []>;
931
957 MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
958
932959 def MPYHHAr32:
933 RRForm<0b01100010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
934 "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
935 []>;
960 MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
936961
937962 // mpyhhu: Multiply high-high, unsigned
963
964 class MPYHHUInst:
965 RRForm<0b01110011110, OOL, IOL,
966 "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
967 [/* no pattern */]>;
968
938969 def MPYHHUvec:
939 RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
940 "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
941 []>;
942
970 MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
971
943972 def MPYHHUr32:
944 RRForm<0b01110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
945 "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
946 []>;
973 MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
947974
948975 // mpyhhau: Multiply high-high, unsigned
976
977 class MPYHHAUInst:
978 RRForm<0b01110010110, OOL, IOL,
979 "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
980 [/* no pattern */]>;
981
949982 def MPYHHAUvec:
950 RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
951 "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
952 []>;
953
983 MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
984
954985 def MPYHHAUr32:
955 RRForm<0b01110010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
956 "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
957 []>;
958
959 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
960 // v4i32, i32 multiply instruction sequence:
961 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
962 def MPYv4i32:
963 Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
964 (Av4i32
965 (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
966 (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
967 (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
968
969 def MPYi32:
970 Pat<(mul R32C:$rA, R32C:$rB),
971 (Ar32
972 (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
973 (MPYHr32 R32C:$rB, R32C:$rA)),
974 (MPYUr32 R32C:$rA, R32C:$rB))>;
986 MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
975987
976988 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
977989 // clz: Count leading zeroes
982994
983995 class CLZRegInst:
984996 CLZInst<(outs rclass:$rT), (ins rclass:$rA),
985 [(set rclass:$rT, (ctlz rclass:$rA))]>;
997 [(set rclass:$rT, (ctlz rclass:$rA))]>;
986998
987999 class CLZVecInst:
9881000 CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
14231435 def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
14241436 [/* no pattern */]>;
14251437
1426 // scalar->vector promotion:
1438 // scalar->vector promotion, prefslot2vec:
14271439 def v16i8_i8: ORPromoteScalar;
14281440 def v8i16_i16: ORPromoteScalar;
14291441 def v4i32_i32: ORPromoteScalar;
14311443 def v4f32_f32: ORPromoteScalar;
14321444 def v2f64_f64: ORPromoteScalar;
14331445
1434 // extract element 0:
1446 // vector->scalar demotion, vec2prefslot:
14351447 def i8_v16i8: ORExtractElt;
14361448 def i16_v8i16: ORExtractElt;
14371449 def i32_v4i32: ORExtractElt;
18301842 (and (vnot (vectype VECREG:$rC)),
18311843 (vectype VECREG:$rA))))]>;
18321844
1845 class SELBVecVCondInst:
1846 SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
1847 [(set (vectype VECREG:$rT),
1848 (select (vectype VECREG:$rC),
1849 (vectype VECREG:$rB),
1850 (vectype VECREG:$rA)))]>;
1851
18331852 class SELBVecCondInst:
18341853 SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
18351854 [(set (vectype VECREG:$rT),
18661885 def v4i32_cond: SELBVecCondInst;
18671886 def v2i64_cond: SELBVecCondInst;
18681887
1888 def v16i8_vcond: SELBVecCondInst;
1889 def v8i16_vcond: SELBVecCondInst;
1890 def v4i32_vcond: SELBVecCondInst;
1891 def v2i64_vcond: SELBVecCondInst;
1892
1893 def v4f32_cond:
1894 SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
1895 [(set (v4f32 VECREG:$rT),
1896 (select (v4i32 VECREG:$rC),
1897 (v4f32 VECREG:$rB),
1898 (v4f32 VECREG:$rA)))]>;
1899
18691900 // SELBr64_cond is defined further down, look for i64 comparisons
18701901 def r32_cond: SELBRegCondInst;
1902 def f32_cond: SELBRegCondInst;
18711903 def r16_cond: SELBRegCondInst;
18721904 def r8_cond: SELBRegCondInst;
18731905 }
24532485 RotateShift, pattern>;
24542486
24552487 class ROTQBIVecInst:
2456 ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
2488 ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
24572489 [/* no pattern yet */]>;
24582490
24592491 class ROTQBIRegInst:
2460 ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
2492 ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
24612493 [/* no pattern yet */]>;
24622494
24632495 multiclass RotateQuadByBitCount
26442676 // ROTQMBYvec: This is a vector form merely so that when used in an
26452677 // instruction pattern, type checking will succeed. This instruction assumes
26462678 // that the user knew to negate $rB.
2647 //
2648 // Using the SPUrotquad_rz_bytes target-specific DAG node, the patterns
2649 // ensure that $rB is negated.
26502679 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
26512680
26522681 class ROTQMBYInst pattern>:
26592688
26602689 class ROTQMBYRegInst:
26612690 ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
2662 [(set rclass:$rT,
2663 (SPUrotquad_rz_bytes rclass:$rA, R32C:$rB))]>;
2691 [/* no pattern */]>;
26642692
26652693 multiclass RotateQuadBytes
26662694 {
26752703
26762704 defm ROTQMBY : RotateQuadBytes;
26772705
2678 def : Pat<(SPUrotquad_rz_bytes (v16i8 VECREG:$rA), R32C:$rB),
2679 (ROTQMBYv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
2680 def : Pat<(SPUrotquad_rz_bytes (v8i16 VECREG:$rA), R32C:$rB),
2681 (ROTQMBYv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
2682 def : Pat<(SPUrotquad_rz_bytes (v4i32 VECREG:$rA), R32C:$rB),
2683 (ROTQMBYv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
2684 def : Pat<(SPUrotquad_rz_bytes (v2i64 VECREG:$rA), R32C:$rB),
2685 (ROTQMBYv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
2686 def : Pat<(SPUrotquad_rz_bytes GPRC:$rA, R32C:$rB),
2687 (ROTQMBYr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>;
2688 def : Pat<(SPUrotquad_rz_bytes R64C:$rA, R32C:$rB),
2689 (ROTQMBYr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>;
2690
26912706 class ROTQMBYIInst pattern>:
26922707 RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
26932708 RotateShift, pattern>;
26942709
26952710 class ROTQMBYIVecInst:
26962711 ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
2697 [(set (vectype VECREG:$rT),
2698 (SPUrotquad_rz_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
2712 [/* no pattern */]>;
26992713
27002714 class ROTQMBYIRegInst:
27012715 ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
2702 [(set rclass:$rT,
2703 (SPUrotquad_rz_bytes rclass:$rA, (inttype pred:$val)))]>;
2716 [/* no pattern */]>;
27042717
27052718 multiclass RotateQuadBytesImm
27062719 {
27242737 RotateShift, pattern>;
27252738
27262739 class ROTQMBYBIVecInst:
2727 ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
2728 [/* no pattern, intrinsic? */]>;
2740 ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
2741 [/* no pattern, */]>;
27292742
27302743 multiclass RotateMaskQuadByBitCount
27312744 {
27672780
27682781 defm ROTQMBI: RotateMaskQuadByBits;
27692782
2770 def : Pat<(SPUrotquad_rz_bits (v16i8 VECREG:$rA), R32C:$rB),
2771 (ROTQMBIv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
2772 def : Pat<(SPUrotquad_rz_bits (v8i16 VECREG:$rA), R32C:$rB),
2773 (ROTQMBIv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
2774 def : Pat<(SPUrotquad_rz_bits (v4i32 VECREG:$rA), R32C:$rB),
2775 (ROTQMBIv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
2776 def : Pat<(SPUrotquad_rz_bits (v2i64 VECREG:$rA), R32C:$rB),
2777 (ROTQMBIv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
2778 def : Pat<(SPUrotquad_rz_bits GPRC:$rA, R32C:$rB),
2779 (ROTQMBIr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>;
2780 def : Pat<(SPUrotquad_rz_bits R64C:$rA, R32C:$rB),
2781 (ROTQMBIr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>;
2782
27832783 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
27842784 // Rotate quad and mask by bits, immediate
27852785 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
27902790
27912791 class ROTQMBIIVecInst:
27922792 ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
2793 [(set (vectype VECREG:$rT),
2794 (SPUrotquad_rz_bits (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
2793 [/* no pattern */]>;
27952794
27962795 class ROTQMBIIRegInst:
27972796 ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
2798 [(set rclass:$rT,
2799 (SPUrotquad_rz_bits rclass:$rA, (i32 uimm7:$val)))]>;
2797 [/* no pattern */]>;
28002798
28012799 multiclass RotateMaskQuadByBitsImm
28022800 {
31413139
31423140 def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
31433141 [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
3142
3143 // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
3144 def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
3145 [(set (v4i32 VECREG:$rT),
3146 (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
3147 (v4i32 v4i32SExt16Imm:$val)))]>;
3148
3149 def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
3150 [/* no pattern */]>;
31443151 }
31453152
31463153 class CLGTBInst pattern> :
37493756
37503757 class FAInst pattern>:
37513758 RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
3752 SPrecFP, pattern>;
3759 SPrecFP, pattern>;
37533760
37543761 class FAVecInst:
37553762 FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
37563763 [(set (vectype VECREG:$rT),
3757 (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
3764 (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
37583765
37593766 multiclass SFPAdd
37603767 {
37613768 def v4f32: FAVecInst;
3762 def r32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
3763 [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
3769 def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
3770 [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
37643771 }
37653772
37663773 defm FA : SFPAdd;
37673774
37683775 class FSInst pattern>:
37693776 RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
3770 SPrecFP, pattern>;
3777 SPrecFP, pattern>;
37713778
37723779 class FSVecInst:
37733780 FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
3774 [(set (vectype VECREG:$rT),
3775 (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
3781 [(set (vectype VECREG:$rT),
3782 (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
37763783
37773784 multiclass SFPSub
37783785 {
37793786 def v4f32: FSVecInst;
3780 def r32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
3781 [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
3787 def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
3788 [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
37823789 }
37833790
37843791 defm FS : SFPSub;
37853792
37863793 // Floating point reciprocal estimate
3787 def FREv4f32 :
3788 RRForm_1<0b00011101100, (outs VECREG:$rT), (ins VECREG:$rA),
3789 "frest\t$rT, $rA", SPrecFP,
3790 [(set (v4f32 VECREG:$rT), (SPUreciprocalEst (v4f32 VECREG:$rA)))]>;
3791
3792 def FREf32 :
3793 RRForm_1<0b00011101100, (outs R32FP:$rT), (ins R32FP:$rA),
3794 "frest\t$rT, $rA", SPrecFP,
3795 [(set R32FP:$rT, (SPUreciprocalEst R32FP:$rA))]>;
3794
3795 class FRESTInst:
3796 RRForm_1<0b00110111000, OOL, IOL,
3797 "frest\t$rT, $rA", SPrecFP,
3798 [/* no pattern */]>;
3799
3800 def FRESTv4f32 :
3801 FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
3802
3803 def FRESTf32 :
3804 FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
37963805
37973806 // Floating point interpolate (used in conjunction with reciprocal estimate)
37983807 def FIv4f32 :
37993808 RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
38003809 "fi\t$rT, $rA, $rB", SPrecFP,
3801 [(set (v4f32 VECREG:$rT), (SPUinterpolate (v4f32 VECREG:$rA),
3802 (v4f32 VECREG:$rB)))]>;
3810 [/* no pattern */]>;
38033811
38043812 def FIf32 :
38053813 RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
38063814 "fi\t$rT, $rA, $rB", SPrecFP,
3807 [(set R32FP:$rT, (SPUinterpolate R32FP:$rA, R32FP:$rB))]>;
3815 [/* no pattern */]>;
38083816
38093817 //--------------------------------------------------------------------------
38103818 // Basic single precision floating point comparisons:
44444452 (SPUlo tconstpool:$in, 0)),
44454453 (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
44464454
4455 /*
44474456 def : Pat<(SPUindirect R32C:$sp, i32ImmSExt10:$imm),
44484457 (AIr32 R32C:$sp, i32ImmSExt10:$imm)>;
44494458
44504459 def : Pat<(SPUindirect R32C:$sp, imm:$imm),
44514460 (Ar32 R32C:$sp,
44524461 (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm)))>;
4462 */
44534463
44544464 def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
44554465 (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
44654475
44664476 // Instrinsics:
44674477 include "CellSDKIntrinsics.td"
4478 // Various math operator instruction sequences
4479 include "SPUMathInstr.td"
44684480 // 64-bit "instructions"/support
44694481 include "SPU64InstrInfo.td"
0 //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
1 //
2 // Cell SPU math operations
3 //
4 // This target description file contains instruction sequences for various
5 // math operations, such as vector multiplies, i32 multiply, etc., for the
6 // SPU's i32, i16 i8 and corresponding vector types.
7 //
8 // Any resemblance to libsimdmath or the Cell SDK simdmath library is
9 // purely and completely coincidental.
10 //
11 // Primary author: Scott Michel (scottm@aero.org)
12 //===----------------------------------------------------------------------===//
13
14 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
15 // v16i8 multiply instruction sequence:
16 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
17
18 def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
19 (ORv4i32
20 (ANDv4i32
21 (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
22 (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
23 (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
24 (FSMBIv8i16 0x2222)),
25 (ILAv4i32 0x0000ffff)),
26 (SHLIv4i32
27 (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
28 (ROTMAIv4i32_i32 VECREG:$rB, 16)),
29 (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
30 (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
31 (FSMBIv8i16 0x2222)), 16))>;
32
33 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
34 // v8i16 multiply instruction sequence:
35 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
36
37 def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
38 (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
39 (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
40 (FSMBIv8i16 0xcccc))>;
41
42 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
43 // v4i32, i32 multiply instruction sequence:
44 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
45
46 def MPYv4i32:
47 Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
48 (Av4i32
49 (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
50 (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
51 (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
52
53 def MPYi32:
54 Pat<(mul R32C:$rA, R32C:$rB),
55 (Ar32
56 (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
57 (MPYHr32 R32C:$rB, R32C:$rA)),
58 (MPYUr32 R32C:$rA, R32C:$rB))>;
59
60 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
61 // f32, v4f32 divide instruction sequence:
62 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
63
64 // Reciprocal estimate and interpolation
65 def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
66 // Division estimate
67 def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
68 // Newton-Raphson iteration
69 def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
70 Interpf32.Fragment,
71 DivEstf32.Fragment)>;
72 // Epsilon addition
73 def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
74
75 def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
76 (SELBf32_cond NRaphf32.Fragment,
77 Epsilonf32.Fragment,
78 (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
79
80 // Reciprocal estimate and interpolation
81 def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
82 // Division estimate
83 def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
84 // Newton-Raphson iteration
85 def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
86 (v4f32 VECREG:$rB),
87 (v4f32 VECREG:$rA)),
88 Interpv4f32.Fragment,
89 DivEstv4f32.Fragment)>;
90 // Epsilon addition
91 def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
92
93 def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
94 (SELBv4f32_cond NRaphv4f32.Fragment,
95 Epsilonv4f32.Fragment,
96 (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
97 Epsilonv4f32.Fragment,
98 (v4f32 VECREG:$rA)), -1))>;
8686 // SPUISelLowering.h):
8787 def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
8888
89 // SPU 16-bit multiply
90 def SPUmpy_vec: SDNode<"SPUISD::MPY", SPUVecBinop, []>;
91
92 // SPU multiply unsigned, used in instruction lowering for v4i32
93 // multiplies:
94 def SPUmpyu_vec: SDNode<"SPUISD::MPYU", SPUVecBinop, []>;
95 def SPUmpyu_int: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>;
96
97 // SPU 16-bit multiply high x low, shift result 16-bits
98 // Used to compute intermediate products for 32-bit multiplies
99 def SPUmpyh_vec: SDNode<"SPUISD::MPYH", SPUVecBinop, []>;
100 def SPUmpyh_int: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>;
101
102 // SPU 16-bit multiply high x high, 32-bit product
103 // Used to compute intermediate products for 16-bit multiplies
104 def SPUmpyhh_vec: SDNode<"SPUISD::MPYHH", SPUVecBinop, []>;
105 def SPUmpyhh_int: SDNode<"SPUISD::MPYHH", SDTIntBinOp, []>;
106
10789 // Shift left quadword by bits and bytes
10890 def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
10991 def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
11597
11698 def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
11799 def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
118
119 def SPUrotquad_rz_bytes: SDNode<"SPUISD::ROTQUAD_RZ_BYTES",
120 SPUvecshift_type, []>;
121 def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS",
122 SPUvecshift_type, []>;
123100
124101 // Vector rotate left, bits shifted out of the left are rotated in on the right
125102 def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
139116
140117 // SPU gather bits instruction:
141118 def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>;
142
143 // SPU floating point interpolate
144 def SPUinterpolate : SDNode<"SPUISD::FPInterp", SDTFPBinOp, []>;
145
146 // SPU floating point reciprocal estimate (used for fdiv)
147 def SPUreciprocalEst: SDNode<"SPUISD::FPRecipEst", SDTFPUnaryOp, []>;
148119
149120 def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
150121 def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
237237 SPU::R0, /* link register */
238238 0 /* end */
239239 };
240
240
241241 return SPU_CalleeSaveRegs;
242242 }
243243
267267 &SPU::GPRCRegClass, /* link register */
268268 0 /* end */
269269 };
270
270
271271 return SPU_CalleeSaveRegClasses;
272272 }
273273
338338 // Now add the frame object offset to the offset from r1.
339339 int Offset = MFI->getObjectOffset(FrameIndex);
340340
341 // Most instructions, except for generated FrameIndex additions using AIr32,
342 // have the immediate in operand 1. AIr32, in this case, has the immediate
343 // in operand 2.
344 unsigned OpNo = (MI.getOpcode() != SPU::AIr32 ? 1 : 2);
341 // Most instructions, except for generated FrameIndex additions using AIr32
342 // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
343 // immediate in operand 2.
344 unsigned OpNo = 1;
345 if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
346 OpNo = 2;
347
345348 MachineOperand &MO = MI.getOperand(OpNo);
346349
347350 // Offset is biased by $lr's slot at the bottom.
354357 if (Offset > SPUFrameInfo::maxFrameOffset()
355358 || Offset < SPUFrameInfo::minFrameOffset()) {
356359 cerr << "Large stack adjustment ("
357 << Offset
360 << Offset
358361 << ") in SPURegisterInfo::eliminateFrameIndex.";
359362 } else {
360363 MO.ChangeToImmediate(Offset);
370373
371374 // Get the number of bytes to allocate from the FrameInfo
372375 unsigned FrameSize = MFI->getStackSize();
373
376
374377 // Get the alignments provided by the target, and the maximum alignment
375378 // (if any) of the fixed frame objects.
376379 unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
380383
381384 // Get the maximum call frame size of all the calls.
382385 unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
383
386
384387 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
385388 // that allocations will be aligned.
386389 if (MFI->hasVarSizedObjects())
388391
389392 // Update maximum call frame size.
390393 MFI->setMaxCallFrameSize(maxCallFrameSize);
391
394
392395 // Include call frame size in total.
393396 FrameSize += maxCallFrameSize;
394397
417420 MachineBasicBlock::iterator MBBI = MBB.begin();
418421 MachineFrameInfo *MFI = MF.getFrameInfo();
419422 MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
420
423
421424 // Prepare for debug frame info.
422425 bool hasDebugInfo = MMI && MMI->hasDebugInfo();
423426 unsigned FrameLabelId = 0;
424
427
425428 // Move MBBI back to the beginning of the function.
426429 MBBI = MBB.begin();
427
430
428431 // Work out frame sizes.
429432 determineFrameLayout(MF);
430433 int FrameSize = MFI->getStackSize();
431
434
432435 assert((FrameSize & 0xf) == 0
433436 && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
434437
439442 FrameLabelId = MMI->NextLabelID();
440443 BuildMI(MBB, MBBI, TII.get(SPU::DBG_LABEL)).addImm(FrameLabelId);
441444 }
442
445
443446 // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
444447 // for the ABI
445448 BuildMI(MBB, MBBI, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
475478 cerr << "Unhandled frame size: " << FrameSize << "\n";
476479 abort();
477480 }
478
481
479482 if (hasDebugInfo) {
480483 std::vector &Moves = MMI->getFrameMoves();
481
484
482485 // Show update of SP.
483486 MachineLocation SPDst(MachineLocation::VirtualFP);
484487 MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
485488 Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
486
489
487490 // Add callee saved registers to move list.
488491 const std::vector &CSI = MFI->getCalleeSavedInfo();
489492 for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
494497 MachineLocation CSSrc(Reg);
495498 Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
496499 }
497
500
498501 // Mark effective beginning of when frame pointer is ready.
499502 unsigned ReadyLabelId = MMI->NextLabelID();
500503 BuildMI(MBB, MBBI, TII.get(SPU::DBG_LABEL)).addImm(ReadyLabelId);
501
504
502505 MachineLocation FPDst(SPU::R1);
503506 MachineLocation FPSrc(MachineLocation::VirtualFP);
504507 Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
11 ; RUN: grep frest %t1.s | count 2
22 ; RUN: grep -w fi %t1.s | count 2
3 ; RUN: grep fm %t1.s | count 4
3 ; RUN: grep -w fm %t1.s | count 2
44 ; RUN: grep fma %t1.s | count 2
5 ; RUN: grep fnms %t1.s | count 2
5 ; RUN: grep fnms %t1.s | count 4
6 ; RUN: grep cgti %t1.s | count 2
7 ; RUN: grep selb %t1.s | count 2
68 ;
79 ; This file includes standard floating point arithmetic instructions
810 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep {fsmbi.*61680} %t1.s | count 1
2 ; RUN: grep rotqmbyi %t1.s | count 1
3 ; RUN: grep rotmai %t1.s | count 1
4 ; RUN: grep selb %t1.s | count 1
1 ; RUN: grep xswd %t1.s | count 1
52 ; RUN: grep shufb %t1.s | count 2
63 ; RUN: grep cg %t1.s | count 1
74 ; RUN: grep addx %t1.s | count 1
77 ; RUN: grep and %t1.s | count 2
88 ; RUN: grep selb %t1.s | count 6
99 ; RUN: grep fsmbi %t1.s | count 4
10 ; RUN: grep shli %t1.s | count 2
10 ; RUN: grep shli %t1.s | count 4
1111 ; RUN: grep shlhi %t1.s | count 4
1212 ; RUN: grep ila %t1.s | count 2
1313 ; RUN: grep xsbh %t1.s | count 4
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep shlh %t1.s | count 84
2 ; RUN: grep shlhi %t1.s | count 51
3 ; RUN: grep shl %t1.s | count 168
4 ; RUN: grep shli %t1.s | count 51
5 ; RUN: grep xshw %t1.s | count 5
6 ; RUN: grep and %t1.s | count 5
1 ; RUN: grep -w shlh %t1.s | count 9
2 ; RUN: grep -w shlhi %t1.s | count 3
3 ; RUN: grep -w shl %t1.s | count 9
4 ; RUN: grep -w shli %t1.s | count 3
5 ; RUN: grep -w xshw %t1.s | count 5
6 ; RUN: grep -w and %t1.s | count 5
7 ; RUN: grep -w andi %t1.s | count 2
8 ; RUN: grep -w rotmi %t1.s | count 2
9 ; RUN: grep -w rotqmbyi %t1.s | count 1
10 ; RUN: grep -w rotqmbii %t1.s | count 2
11 ; RUN: grep -w rotqmby %t1.s | count 1
12 ; RUN: grep -w rotqmbi %t1.s | count 1
13 ; RUN: grep -w rotqbyi %t1.s | count 1
14 ; RUN: grep -w rotqbii %t1.s | count 2
15 ; RUN: grep -w rotqbybi %t1.s | count 1
16 ; RUN: grep -w sfi %t1.s | count 3
17
718 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
819 target triple = "spu"
920
209220 %A = shl i32 0, %arg1
210221 ret i32 %A
211222 }
223
224 ;; i64 shift left
225
226 define i64 @shl_i64_1(i64 %arg1) {
227 %A = shl i64 %arg1, 9
228 ret i64 %A
229 }
230
231 define i64 @shl_i64_2(i64 %arg1) {
232 %A = shl i64 %arg1, 3
233 ret i64 %A
234 }
235
236 define i64 @shl_i64_3(i64 %arg1, i32 %shift) {
237 %1 = zext i32 %shift to i64
238 %2 = shl i64 %arg1, %1
239 ret i64 %2
240 }
241
242 ;; i64 shift right logical (shift 0s from the right)
243
244 define i64 @lshr_i64_1(i64 %arg1) {
245 %1 = lshr i64 %arg1, 9
246 ret i64 %1
247 }
248
249 define i64 @lshr_i64_2(i64 %arg1) {
250 %1 = lshr i64 %arg1, 3
251 ret i64 %1
252 }
253
254 define i64 @lshr_i64_3(i64 %arg1, i32 %shift) {
255 %1 = zext i32 %shift to i64
256 %2 = lshr i64 %arg1, %1
257 ret i64 %2
258 }
259
260 ;; i64 shift right arithmetic (shift 1s from the right)
261
262 define i64 @ashr_i64_1(i64 %arg) {
263 %1 = ashr i64 %arg, 9
264 ret i64 %1
265 }
266
267 define i64 @ashr_i64_2(i64 %arg) {
268 %1 = ashr i64 %arg, 3
269 ret i64 %1
270 }
271
272 define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
273 %1 = zext i32 %shift to i64
274 %2 = ashr i64 %arg1, %1
275 ret i64 %2
276 }
3333 { "neq", i64_neq, i64_neq_select }
3434 };
3535
36 uint64_t i64_shl_const(uint64_t a) {
37 return a << 10;
38 }
39
40 uint64_t i64_shl(uint64_t a, int amt) {
41 return a << amt;
42 }
43
44 uint64_t i64_srl_const(uint64_t a) {
45 return a >> 10;
46 }
47
48 uint64_t i64_srl(uint64_t a, int amt) {
49 return a >> amt;
50 }
51
52 int64_t i64_sra_const(int64_t a) {
53 return a >> 10;
54 }
55
56 int64_t i64_sra(int64_t a, int amt) {
57 return a >> amt;
58 }
59
3660 int main(void) {
3761 int i;
38 int64_t a = 1234567890000LL;
39 int64_t b = 2345678901234LL;
40 int64_t c = 1234567890001LL;
41 int64_t d = 10001LL;
42 int64_t e = 10000LL;
62 int64_t a = 1234567890003LL;
63 int64_t b = 2345678901235LL;
64 int64_t c = 1234567890001LL;
65 int64_t d = 10001LL;
66 int64_t e = 10000LL;
67 int64_t f = -1068103409991LL;
4368
4469 printf("a = %16lld (0x%016llx)\n", a, a);
4570 printf("b = %16lld (0x%016llx)\n", b, b);
4671 printf("c = %16lld (0x%016llx)\n", c, c);
4772 printf("d = %16lld (0x%016llx)\n", d, d);
4873 printf("e = %16lld (0x%016llx)\n", e, e);
74 printf("f = %16lld (0x%016llx)\n", f, f);
4975 printf("----------------------------------------\n");
5076
5177 for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
6389 printf("----------------------------------------\n");
6490 }
6591
92 printf("a = 0x%016llx\n", a);
93 printf("i64_shl_const(a) = 0x%016llx\n", i64_shl_const(a));
94 printf("i64_shl(a) = 0x%016llx\n", i64_shl(a, 5));
95 printf("i64_srl_const(a) = 0x%016llx\n", i64_srl_const(a));
96 printf("i64_srl(a) = 0x%016llx\n", i64_srl(a, 5));
97 printf("i64_sra_const(a) = 0x%016llx\n", i64_sra_const(a));
98 printf("i64_sra(a) = 0x%016llx\n", i64_sra(a, 5));
99 printf("----------------------------------------\n");
100
101 printf("f = 0x%016llx\n", f);
102 printf("i64_shl_const(f) = 0x%016llx\n", i64_shl_const(f));
103 printf("i64_shl(f) = 0x%016llx\n", i64_shl(f, 10));
104 printf("i64_srl_const(f) = 0x%016llx\n", i64_srl_const(f));
105 printf("i64_srl(f) = 0x%016llx\n", i64_srl(f, 10));
106 printf("i64_sra_const(f) = 0x%016llx\n", i64_sra_const(f));
107 printf("i64_sra(f) = 0x%016llx\n", i64_sra(f, 10));
108 printf("----------------------------------------\n");
109
66110 return 0;
67111 }