llvm.org GIT mirror llvm / 9de5d0d
More CellSPU refinement and progress: - Cleaned up custom load/store logic, common code is now shared [see note below], cleaned up address modes - More test cases: various intrinsics, structure element access (load/store test), updated target data strings, indirect function calls. Note: This patch contains a refactoring of the LoadSDNode and StoreSDNode structures: they now share a common base class, LSBaseSDNode, that provides an interface to their common functionality. There is some hackery to access the proper operand depending on the derived class; otherwise, to do a proper job would require finding and rearranging the SDOperands sent to StoreSDNode's constructor. The current refactor errs on the side of being conservatively and backwardly compatible while providing functionality that reduces redundant code for targets where loads and stores are custom-lowered. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45851 91177308-0d34-0410-b5e6-96231b3b80d8 Scott Michel 12 years ago
32 changed file(s) with 929 addition(s) and 423 deletion(s). Raw diff Collapse all Expand all
14531453 }
14541454 };
14551455
1456 /// LSBaseSDNode - Base class for LoadSDNode and StoreSDNode
1457 ///
1458 class LSBaseSDNode : public SDNode {
1459 private:
1460 //! SrcValue - Memory location for alias analysis.
1461 const Value *SrcValue;
1462
1463 //! SVOffset - Memory location offset.
1464 int SVOffset;
1465
1466 //! Alignment - Alignment of memory location in bytes.
1467 unsigned Alignment;
1468
1469 //! IsVolatile - True if the store is volatile.
1470 bool IsVolatile;
1471 protected:
1472 //! Operand array for load and store
1473 /*!
1474 \note Moving this array to the base class captures more
1475 common functionality shared between LoadSDNode and
1476 StoreSDNode
1477 */
1478 SDOperand Ops[4];
1479 public:
1480 LSBaseSDNode(ISD::NodeType NodeTy, SDVTList VTs, const Value *SV, int SVO,
1481 unsigned Align, bool Vol)
1482 : SDNode(NodeTy, VTs),
1483 SrcValue(SV), SVOffset(SVO), Alignment(Align), IsVolatile(Vol)
1484 { }
1485
1486 const SDOperand getChain() const {
1487 return getOperand(0);
1488 }
1489 const SDOperand getBasePtr() const {
1490 return getOperand(getOpcode() == ISD::LOAD ? 1 : 2);
1491 }
1492 const SDOperand getOffset() const {
1493 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
1494 }
1495 const SDOperand getValue() const {
1496 assert(getOpcode() == ISD::STORE);
1497 return getOperand(1);
1498 }
1499
1500 const Value *getSrcValue() const { return SrcValue; }
1501 int getSrcValueOffset() const { return SVOffset; }
1502 unsigned getAlignment() const { return Alignment; }
1503 bool isVolatile() const { return IsVolatile; }
1504
1505 static bool classof(const LSBaseSDNode *N) { return true; }
1506 static bool classof(const SDNode *N) { return true; }
1507 };
1508
14561509 /// LoadSDNode - This class is used to represent ISD::LOAD nodes.
14571510 ///
1458 class LoadSDNode : public SDNode {
1459 virtual void ANCHOR(); // Out-of-line virtual method to give class a home.
1460 SDOperand Ops[3];
1511 class LoadSDNode : public LSBaseSDNode {
1512 virtual void ANCHOR(); // Out-of-line virtual method to give class a home.
14611513
14621514 // AddrMode - unindexed, pre-indexed, post-indexed.
14631515 ISD::MemIndexedMode AddrMode;
14671519
14681520 // LoadedVT - VT of loaded value before extension.
14691521 MVT::ValueType LoadedVT;
1470
1471 // SrcValue - Memory location for alias analysis.
1472 const Value *SrcValue;
1473
1474 // SVOffset - Memory location offset.
1475 int SVOffset;
1476
1477 // Alignment - Alignment of memory location in bytes.
1478 unsigned Alignment;
1479
1480 // IsVolatile - True if the load is volatile.
1481 bool IsVolatile;
14821522 protected:
14831523 friend class SelectionDAG;
14841524 LoadSDNode(SDOperand *ChainPtrOff, SDVTList VTs,
14851525 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, MVT::ValueType LVT,
14861526 const Value *SV, int O=0, unsigned Align=0, bool Vol=false)
1487 : SDNode(ISD::LOAD, VTs),
1488 AddrMode(AM), ExtType(ETy), LoadedVT(LVT), SrcValue(SV), SVOffset(O),
1489 Alignment(Align), IsVolatile(Vol) {
1527 : LSBaseSDNode(ISD::LOAD, VTs, SV, O, Align, Vol),
1528 AddrMode(AM), ExtType(ETy), LoadedVT(LVT) {
14901529 Ops[0] = ChainPtrOff[0]; // Chain
14911530 Ops[1] = ChainPtrOff[1]; // Ptr
14921531 Ops[2] = ChainPtrOff[2]; // Off
14981537 }
14991538 public:
15001539
1501 const SDOperand getChain() const { return getOperand(0); }
1502 const SDOperand getBasePtr() const { return getOperand(1); }
1503 const SDOperand getOffset() const { return getOperand(2); }
15041540 ISD::MemIndexedMode getAddressingMode() const { return AddrMode; }
15051541 ISD::LoadExtType getExtensionType() const { return ExtType; }
15061542 MVT::ValueType getLoadedVT() const { return LoadedVT; }
1507 const Value *getSrcValue() const { return SrcValue; }
1508 int getSrcValueOffset() const { return SVOffset; }
1509 unsigned getAlignment() const { return Alignment; }
1510 bool isVolatile() const { return IsVolatile; }
15111543
15121544 static bool classof(const LoadSDNode *) { return true; }
1545 static bool classof(const LSBaseSDNode *N) { return true; }
15131546 static bool classof(const SDNode *N) {
15141547 return N->getOpcode() == ISD::LOAD;
15151548 }
15171550
15181551 /// StoreSDNode - This class is used to represent ISD::STORE nodes.
15191552 ///
1520 class StoreSDNode : public SDNode {
1521 virtual void ANCHOR(); // Out-of-line virtual method to give class a home.
1522 SDOperand Ops[4];
1553 class StoreSDNode : public LSBaseSDNode {
1554 virtual void ANCHOR(); // Out-of-line virtual method to give class a home.
15231555
15241556 // AddrMode - unindexed, pre-indexed, post-indexed.
15251557 ISD::MemIndexedMode AddrMode;
15291561
15301562 // StoredVT - VT of the value after truncation.
15311563 MVT::ValueType StoredVT;
1532
1533 // SrcValue - Memory location for alias analysis.
1534 const Value *SrcValue;
1535
1536 // SVOffset - Memory location offset.
1537 int SVOffset;
1538
1539 // Alignment - Alignment of memory location in bytes.
1540 unsigned Alignment;
1541
1542 // IsVolatile - True if the store is volatile.
1543 bool IsVolatile;
15441564 protected:
15451565 friend class SelectionDAG;
15461566 StoreSDNode(SDOperand *ChainValuePtrOff, SDVTList VTs,
15471567 ISD::MemIndexedMode AM, bool isTrunc, MVT::ValueType SVT,
15481568 const Value *SV, int O=0, unsigned Align=0, bool Vol=false)
1549 : SDNode(ISD::STORE, VTs),
1550 AddrMode(AM), IsTruncStore(isTrunc), StoredVT(SVT), SrcValue(SV),
1551 SVOffset(O), Alignment(Align), IsVolatile(Vol) {
1569 : LSBaseSDNode(ISD::STORE, VTs, SV, O, Align, Vol),
1570 AddrMode(AM), IsTruncStore(isTrunc), StoredVT(SVT) {
15521571 Ops[0] = ChainValuePtrOff[0]; // Chain
15531572 Ops[1] = ChainValuePtrOff[1]; // Value
15541573 Ops[2] = ChainValuePtrOff[2]; // Ptr
15611580 }
15621581 public:
15631582
1564 const SDOperand getChain() const { return getOperand(0); }
1565 const SDOperand getValue() const { return getOperand(1); }
1566 const SDOperand getBasePtr() const { return getOperand(2); }
1567 const SDOperand getOffset() const { return getOperand(3); }
15681583 ISD::MemIndexedMode getAddressingMode() const { return AddrMode; }
15691584 bool isTruncatingStore() const { return IsTruncStore; }
15701585 MVT::ValueType getStoredVT() const { return StoredVT; }
1571 const Value *getSrcValue() const { return SrcValue; }
1572 int getSrcValueOffset() const { return SVOffset; }
1573 unsigned getAlignment() const { return Alignment; }
1574 bool isVolatile() const { return IsVolatile; }
15751586
15761587 static bool classof(const StoreSDNode *) { return true; }
1588 static bool classof(const LSBaseSDNode *N) { return true; }
15771589 static bool classof(const SDNode *N) {
15781590 return N->getOpcode() == ISD::STORE;
15791591 }
1616 //===----------------------------------------------------------------------===//
1717
1818 // 7-bit integer type, used as an immediate:
19 def cell_i7_ty: LLVMType; // Note: This was i8
20 def cell_i8_ty: LLVMType; // Note: This was i8
19 def cell_i7_ty: LLVMType;
20 def cell_i8_ty: LLVMType;
2121
2222 class v16i8_u7imm :
2323 GCCBuiltin,
2626
2727 class v16i8_u8imm :
2828 GCCBuiltin,
29 Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
29 Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
3030 [IntrNoMem]>;
3131
3232 class v16i8_s10imm :
107107
108108 def CellSDKand:
109109 RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
110 "add\t $rT, $rA, $rB", IntegerOp,
110 "and\t $rT, $rA, $rB", IntegerOp,
111111 [(set (v4i32 VECREG:$rT),
112112 (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
113113
114114 def CellSDKandc:
115115 RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
116 "addc\t $rT, $rA, $rB", IntegerOp,
116 "andc\t $rT, $rA, $rB", IntegerOp,
117117 [(set (v4i32 VECREG:$rT),
118118 (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
119119
120120 def CellSDKandbi:
121 RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
121 RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
122122 "andbi\t $rT, $rA, $val", BranchResolv,
123123 [(set (v16i8 VECREG:$rT),
124124 (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
148148 (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
149149
150150 def CellSDKorbi:
151 RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
151 RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
152152 "orbi\t $rT, $rA, $val", BranchResolv,
153153 [(set (v16i8 VECREG:$rT),
154154 (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
172172 (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
173173
174174 def CellSDKxorbi:
175 RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
175 RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
176176 "xorbi\t $rT, $rA, $val", BranchResolv,
177177 [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
178178
247247 (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
248248
249249 def CellSDKceqbi:
250 RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
250 RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
251251 "ceqbi\t $rT, $rA, $val", BranchResolv,
252252 [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
253253
293293 (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
294294
295295 def CellSDKcgtbi:
296 RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
296 RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
297297 "cgtbi\t $rT, $rA, $val", BranchResolv,
298298 [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
299299
328328 (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
329329
330330 def CellSDKclgtbi:
331 RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
331 RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
332332 "clgtbi\t $rT, $rA, $val", BranchResolv,
333333 [(set (v16i8 VECREG:$rT),
334334 (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
383383 SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
384384 SDOperand &Index) {
385385 // These match the addr256k operand type:
386 MVT::ValueType OffsVT = MVT::i16;
386387 MVT::ValueType PtrVT = SPUtli.getPointerTy();
387 MVT::ValueType OffsVT = MVT::i16;
388388
389389 switch (N.getOpcode()) {
390390 case ISD::Constant:
391 case ISD::ConstantPool:
392 case ISD::GlobalAddress:
393 cerr << "SPU SelectAFormAddr: Constant/Pool/Global not lowered.\n";
394 abort();
395 /*NOTREACHED*/
396
391397 case ISD::TargetConstant: {
392398 // Loading from a constant address.
393399 ConstantSDNode *CN = dyn_cast(N);
399405 return true;
400406 }
401407 }
402 case ISD::ConstantPool:
403 case ISD::TargetConstantPool: {
404 // The constant pool address is N. Base is a dummy that will be ignored by
408 case ISD::TargetGlobalAddress:
409 case ISD::TargetConstantPool:
410 case SPUISD::AFormAddr: {
411 // The address is in Base. N is a dummy that will be ignored by
405412 // the assembly printer.
406 Base = N;
407 Index = CurDAG->getTargetConstant(0, OffsVT);
408 return true;
409 }
410
411 case ISD::GlobalAddress:
412 case ISD::TargetGlobalAddress: {
413 // The global address is N. Base is a dummy that is ignored by the
414 // assembly printer.
415413 Base = N;
416414 Index = CurDAG->getTargetConstant(0, OffsVT);
417415 return true;
442440 if (Opc == ISD::Register) {
443441 Base = N;
444442 Index = CurDAG->getTargetConstant(0, PtrTy);
443 return true;
444 } else if (Opc == ISD::FrameIndex) {
445 FrameIndexSDNode *FI = dyn_cast(N);
446 DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = "
447 << FI->getIndex() << "\n");
448 if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
449 Base = CurDAG->getTargetConstant(0, PtrTy);
450 Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
451 return true;
452 }
453 } else if (Opc == ISD::ADD) {
454 // Generated by getelementptr
455 const SDOperand Op0 = N.getOperand(0); // Frame index/base
456 const SDOperand Op1 = N.getOperand(1); // Offset within base
457
458 if (Op1.getOpcode() == ISD::Constant
459 || Op1.getOpcode() == ISD::TargetConstant) {
460 ConstantSDNode *CN = dyn_cast(Op1);
461 assert(CN != 0 && "SelectDFormAddr: Expected a constant");
462
463 int32_t offset = (int32_t) CN->getSignExtended();
464 unsigned Opc0 = Op0.getOpcode();
465
466 if ((offset & 0xf) != 0) {
467 // Unaligned offset: punt and let X-form address handle it.
468 // NOTE: This really doesn't have to be strictly 16-byte aligned,
469 // since the load/store quadword instructions will implicitly
470 // zero the lower 4 bits of the resulting address.
471 return false;
472 }
473
474 if (Opc0 == ISD::FrameIndex) {
475 FrameIndexSDNode *FI = dyn_cast(Op0);
476 DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
477 << " frame index = " << FI->getIndex() << "\n");
478
479 if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
480 Base = CurDAG->getTargetConstant(offset, PtrTy);
481 Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
482 return true;
483 }
484 } else if (offset > SPUFrameInfo::minFrameOffset()
485 && offset < SPUFrameInfo::maxFrameOffset()) {
486 Base = CurDAG->getTargetConstant(offset, PtrTy);
487 if (Opc0 == ISD::GlobalAddress) {
488 // Convert global address to target global address
489 GlobalAddressSDNode *GV = dyn_cast(Op0);
490 Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy);
491 return true;
492 } else {
493 // Otherwise, just take operand 0
494 Index = Op0;
495 return true;
496 }
497 }
498 } else
499 return false;
500 } else if (Opc == SPUISD::DFormAddr) {
501 // D-Form address: This is pretty straightforward, naturally...
502 ConstantSDNode *CN = cast(N.getOperand(1));
503 assert(CN != 0 && "SelectDFormAddr/SPUISD::DFormAddr expecting constant");
504 Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy);
505 Index = N.getOperand(0);
445506 return true;
446507 } else if (Opc == ISD::FrameIndex) {
447508 // Stack frame index must be less than 512 (divided by 16):
453514 Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
454515 return true;
455516 }
456 } else if (Opc == ISD::ADD) {
457 // Generated by getelementptr
458 const SDOperand Op0 = N.getOperand(0); // Frame index/base
459 const SDOperand Op1 = N.getOperand(1); // Offset within base
460 ConstantSDNode *CN = dyn_cast(Op1);
461
462 // Not a constant?
463 if (CN == 0)
464 return false;
465
466 int32_t offset = (int32_t) CN->getSignExtended();
467 unsigned Opc0 = Op0.getOpcode();
468
469 if ((offset & 0xf) != 0) {
470 cerr << "SelectDFormAddr: unaligned offset = " << offset << "\n";
471 abort();
472 /*NOTREACHED*/
473 }
474
475 if (Opc0 == ISD::FrameIndex) {
476 FrameIndexSDNode *FI = dyn_cast(Op0);
477 DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
478 << " frame index = " << FI->getIndex() << "\n");
479
480 if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
481 Base = CurDAG->getTargetConstant(offset, PtrTy);
482 Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
483 return true;
484 }
485 } else if (offset > SPUFrameInfo::minFrameOffset()
486 && offset < SPUFrameInfo::maxFrameOffset()) {
487 Base = CurDAG->getTargetConstant(offset, PtrTy);
488 if (Opc0 == ISD::GlobalAddress) {
489 // Convert global address to target global address
490 GlobalAddressSDNode *GV = dyn_cast(Op0);
491 Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy);
492 return true;
493 } else {
494 // Otherwise, just take operand 0
495 Index = Op0;
496 return true;
497 }
498 }
499 } else if (Opc == SPUISD::DFormAddr) {
500 // D-Form address: This is pretty straightforward, naturally...
501 ConstantSDNode *CN = cast(N.getOperand(1));
502 assert(CN != 0 && "SelectDFormAddr/SPUISD::DFormAddr expecting constant");
503 Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy);
504 Index = N.getOperand(0);
505 return true;
506517 }
507518
508519 return false;
534545 unsigned N2Opc = N2.getOpcode();
535546
536547 if ((N1Opc == SPUISD::Hi && N2Opc == SPUISD::Lo)
537 || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi)) {
548 || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi)
549 || (N1Opc == SPUISD::XFormAddr)) {
538550 Base = N.getOperand(0);
539551 Index = N.getOperand(1);
540552 return true;
547559 abort();
548560 /*UNREACHED*/
549561 }
562 } else if (Opc == SPUISD::XFormAddr) {
563 Base = N;
564 Index = N.getOperand(1);
565 return true;
550566 } else if (N.getNumOperands() == 2) {
551567 SDOperand N1 = N.getOperand(0);
552568 SDOperand N2 = N.getOperand(1);
590606 } else if (Opc == ISD::FrameIndex) {
591607 // Selects to AIr32 FI, 0 which in turn will become AIr32 SP, imm.
592608 int FI = cast(N)->getIndex();
593 SDOperand TFI = CurDAG->getTargetFrameIndex(FI, SPUtli.getPointerTy());
609 MVT::ValueType PtrVT = SPUtli.getPointerTy();
610 SDOperand Zero = CurDAG->getTargetConstant(0, PtrVT);
611 SDOperand TFI = CurDAG->getTargetFrameIndex(FI, PtrVT);
594612
595613 DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AI32 , 0\n");
596 return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI,
597 CurDAG->getTargetConstant(0, MVT::i32));
614 if (N->hasOneUse())
615 return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI, Zero);
616 CurDAG->getTargetNode(SPU::AIr32, Op.getValueType(), TFI, Zero);
598617 } else if (Opc == SPUISD::LDRESULT) {
599618 // Custom select instructions for LDRESULT
600619 unsigned VT = N->getValueType(0);
8181 /*!
8282 \arg Op Operand to test
8383 \return true if the operand is a memory target (i.e., global
84 address, external symbol, constant pool) or an existing D-Form
84 address, external symbol, constant pool) or an A-form
8585 address.
8686 */
8787 bool isMemoryOperand(const SDOperand &Op)
8989 const unsigned Opc = Op.getOpcode();
9090 return (Opc == ISD::GlobalAddress
9191 || Opc == ISD::GlobalTLSAddress
92 || Opc == ISD::FrameIndex
92 /* || Opc == ISD::FrameIndex */
9393 || Opc == ISD::JumpTable
9494 || Opc == ISD::ConstantPool
9595 || Opc == ISD::ExternalSymbol
9696 || Opc == ISD::TargetGlobalAddress
9797 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetFrameIndex
98 /* || Opc == ISD::TargetFrameIndex */
9999 || Opc == ISD::TargetJumpTable
100100 || Opc == ISD::TargetConstantPool
101101 || Opc == ISD::TargetExternalSymbol
102 || Opc == SPUISD::DFormAddr);
102 || Opc == SPUISD::AFormAddr);
103103 }
104104 }
105105
355355 setOperationAction(ISD::OR, MVT::v16i8, Custom);
356356 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
357357 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
358
358
359359 setSetCCResultType(MVT::i32);
360360 setShiftAmountType(MVT::i32);
361361 setSetCCResultContents(ZeroOrOneSetCCResult);
376376 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
377377 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
378378 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
379 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
379380 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
380381 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
381382 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
429430 // LowerOperation implementation
430431 //===----------------------------------------------------------------------===//
431432
433 /// Aligned load common code for CellSPU
434 /*!
435 \param[in] Op The SelectionDAG load or store operand
436 \param[in] DAG The selection DAG
437 \param[in] ST CellSPU subtarget information structure
438 \param[in,out] alignment Caller initializes this to the load or store node's
439 value from getAlignment(), may be updated while generating the aligned load
440 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
441 offset (divisible by 16, modulo 16 == 0)
442 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
443 offset of the preferred slot (modulo 16 != 0)
444 \param[in,out] VT Caller initializes this value type to the the load or store
445 node's loaded or stored value type; may be updated if an i1-extended load or
446 store.
447 \param[out] was16aligned true if the base pointer had 16-byte alignment,
448 otherwise false. Can help to determine if the chunk needs to be rotated.
449
450 Both load and store lowering load a block of data aligned on a 16-byte
451 boundary. This is the common aligned load code shared between both.
452 */
453 static SDOperand
454 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
455 LSBaseSDNode *LSN,
456 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
457 unsigned &VT, bool &was16aligned)
458 {
459 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
460 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
461 SDOperand basePtr = LSN->getBasePtr();
462 SDOperand chain = LSN->getChain();
463
464 if (basePtr.getOpcode() == ISD::ADD) {
465 SDOperand Op1 = basePtr.Val->getOperand(1);
466
467 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
468 const ConstantSDNode *CN = cast(basePtr.Val->getOperand(1));
469
470 alignOffs = (int) CN->getValue();
471 prefSlotOffs = (int) (alignOffs & 0xf);
472
473 // Adjust the rotation amount to ensure that the final result ends up in
474 // the preferred slot:
475 prefSlotOffs -= vtm->prefslot_byte;
476 basePtr = basePtr.getOperand(0);
477
478 // Modify alignment, since the ADD is likely from getElementPtr:
479 switch (basePtr.getOpcode()) {
480 case ISD::GlobalAddress:
481 case ISD::TargetGlobalAddress: {
482 GlobalAddressSDNode *GN = cast(basePtr.Val);
483 const GlobalValue *GV = GN->getGlobal();
484 alignment = GV->getAlignment();
485 break;
486 }
487 }
488 } else {
489 alignOffs = 0;
490 prefSlotOffs = -vtm->prefslot_byte;
491 }
492 } else {
493 alignOffs = 0;
494 prefSlotOffs = -vtm->prefslot_byte;
495 }
496
497 if (alignment == 16) {
498 // Realign the base pointer as a D-Form address:
499 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
500 if (isMemoryOperand(basePtr)) {
501 SDOperand Zero = DAG.getConstant(0, PtrVT);
502 unsigned Opc = (!ST->usingLargeMem()
503 ? SPUISD::AFormAddr
504 : SPUISD::XFormAddr);
505 basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
506 }
507 basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
508 basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
509 }
510
511 // Emit the vector load:
512 was16aligned = true;
513 return DAG.getLoad(MVT::v16i8, chain, basePtr,
514 LSN->getSrcValue(), LSN->getSrcValueOffset(),
515 LSN->isVolatile(), 16);
516 }
517
518 // Unaligned load or we're using the "large memory" model, which means that
519 // we have to be very pessimistic:
520 if (isMemoryOperand(basePtr)) {
521 basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
522 }
523
524 // Add the offset
525 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
526 was16aligned = false;
527 return DAG.getLoad(MVT::v16i8, chain, basePtr,
528 LSN->getSrcValue(), LSN->getSrcValueOffset(),
529 LSN->isVolatile(), 16);
530 }
531
432532 /// Custom lower loads for CellSPU
433533 /*!
434534 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
437537 static SDOperand
438538 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
439539 LoadSDNode *LN = cast(Op);
440 SDOperand basep = LN->getBasePtr();
441540 SDOperand the_chain = LN->getChain();
442 MVT::ValueType BasepOpc = basep.Val->getOpcode();
443541 MVT::ValueType VT = LN->getLoadedVT();
444542 MVT::ValueType OpVT = Op.Val->getValueType(0);
445 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
446543 ISD::LoadExtType ExtType = LN->getExtensionType();
447544 unsigned alignment = LN->getAlignment();
448 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
449545 SDOperand Ops[8];
450
451 if (BasepOpc == ISD::FrameIndex) {
452 // Loading from a frame index is always properly aligned. Always.
453 return SDOperand();
454 }
455546
456547 // For an extending load of an i1 variable, just call it i8 (or whatever we
457548 // were passed) and make it zero-extended:
462553
463554 switch (LN->getAddressingMode()) {
464555 case ISD::UNINDEXED: {
465 SDOperand result;
466 SDOperand rot_op, rotamt;
467 SDOperand ptrp;
468 int c_offset;
469 int c_rotamt;
470
471 // The vector type we really want to be when we load the 16-byte chunk
472 MVT::ValueType vecVT, opVecVT;
556 int offset, rotamt;
557 bool was16aligned;
558 SDOperand result =
559 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
560
561 if (result.Val == 0)
562 return result;
563
564 the_chain = result.getValue(1);
565 // Rotate the chunk if necessary
566 if (rotamt < 0)
567 rotamt += 16;
568 if (rotamt != 0) {
569 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
570
571 if (was16aligned) {
572 Ops[0] = the_chain;
573 Ops[1] = result;
574 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
575 } else {
576 LoadSDNode *LN1 = cast(result);
577 Ops[0] = the_chain;
578 Ops[1] = result;
579 Ops[2] = LN1->getBasePtr();
580 }
581
582 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
583 the_chain = result.getValue(1);
584 }
585
586 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
587 SDVTList scalarvts;
588 MVT::ValueType vecVT = MVT::v16i8;
473589
474 vecVT = MVT::v16i8;
475 if (VT != MVT::i1)
476 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
477 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
478
479 if (basep.getOpcode() == ISD::ADD) {
480 const ConstantSDNode *CN = cast(basep.Val->getOperand(1));
481
482 assert(CN != NULL
483 && "LowerLOAD: ISD::ADD operand 1 is not constant");
484
485 c_offset = (int) CN->getValue();
486 c_rotamt = (int) (c_offset & 0xf);
487
488 // Adjust the rotation amount to ensure that the final result ends up in
489 // the preferred slot:
490 c_rotamt -= vtm->prefslot_byte;
491 ptrp = basep.getOperand(0);
590 // Convert the loaded v16i8 vector to the appropriate vector type
591 // specified by the operand:
592 if (OpVT == VT) {
593 if (VT != MVT::i1)
594 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
595 } else
596 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
597
598 Ops[0] = the_chain;
599 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
600 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
601 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
602 the_chain = result.getValue(1);
492603 } else {
493 c_offset = 0;
494 c_rotamt = -vtm->prefslot_byte;
495 ptrp = basep;
496 }
497
498 if (alignment == 16) {
499 // 16-byte aligned load into preferred slot, no rotation
500 if (c_rotamt == 0) {
501 if (isMemoryOperand(ptrp))
502 // Return unchanged
503 return SDOperand();
504 else {
505 // Return modified D-Form address for pointer:
506 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
507 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
508 if (VT == OpVT)
509 return DAG.getLoad(VT, LN->getChain(), ptrp,
510 LN->getSrcValue(), LN->getSrcValueOffset(),
511 LN->isVolatile(), 16);
512 else
513 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
514 LN->getSrcValueOffset(), OpVT,
515 LN->isVolatile(), 16);
516 }
604 // Handle the sign and zero-extending loads for i1 and i8:
605 unsigned NewOpC;
606
607 if (ExtType == ISD::SEXTLOAD) {
608 NewOpC = (OpVT == MVT::i1
609 ? SPUISD::EXTRACT_I1_SEXT
610 : SPUISD::EXTRACT_I8_SEXT);
517611 } else {
518 // Need to rotate...
519 if (c_rotamt < 0)
520 c_rotamt += 16;
521 // Realign the base pointer, with a D-Form address
522 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
523 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
524 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
525 else
526 basep = ptrp;
527
528 // Rotate the load:
529 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
530 LN->getSrcValue(), LN->getSrcValueOffset(),
531 LN->isVolatile(), 16);
532 the_chain = rot_op.getValue(1);
533 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
534
535 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
536 Ops[0] = the_chain;
537 Ops[1] = rot_op;
538 Ops[2] = rotamt;
539
540 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
541 the_chain = result.getValue(1);
542
543 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
544 SDVTList scalarvts;
545 Ops[0] = the_chain;
546 Ops[1] = result;
547 if (OpVT == VT) {
548 scalarvts = DAG.getVTList(VT, MVT::Other);
549 } else {
550 scalarvts = DAG.getVTList(OpVT, MVT::Other);
551 }
552
553 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
554 result);
555 Ops[0] = the_chain;
556 Ops[1] = result;
557 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
558 the_chain = result.getValue(1);
559 } else {
560 // Handle the sign and zero-extending loads for i1 and i8:
561 unsigned NewOpC;
562
563 if (ExtType == ISD::SEXTLOAD) {
564 NewOpC = (OpVT == MVT::i1
565 ? SPUISD::EXTRACT_I1_SEXT
566 : SPUISD::EXTRACT_I8_SEXT);
567 } else {
568 assert(ExtType == ISD::ZEXTLOAD);
569 NewOpC = (OpVT == MVT::i1
570 ? SPUISD::EXTRACT_I1_ZEXT
571 : SPUISD::EXTRACT_I8_ZEXT);
572 }
573
574 result = DAG.getNode(NewOpC, OpVT, result);
575 }
576
577 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
578 SDOperand retops[2] = { result, the_chain };
579
580 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
581 return result;
582 /*UNREACHED*/
612 assert(ExtType == ISD::ZEXTLOAD);
613 NewOpC = (OpVT == MVT::i1
614 ? SPUISD::EXTRACT_I1_ZEXT
615 : SPUISD::EXTRACT_I8_ZEXT);
583616 }
584 } else {
585 // Misaligned 16-byte load:
586 if (basep.getOpcode() == ISD::LOAD) {
587 LN = cast(basep);
588 if (LN->getAlignment() == 16) {
589 // We can verify that we're really loading from a 16-byte aligned
590 // chunk. Encapsulate basep as a D-Form address and return a new
591 // load:
592 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
593 DAG.getConstant(0, PtrVT));
594 if (OpVT == VT)
595 return DAG.getLoad(VT, LN->getChain(), basep,
596 LN->getSrcValue(), LN->getSrcValueOffset(),
597 LN->isVolatile(), 16);
598 else
599 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
600 LN->getSrcValue(), LN->getSrcValueOffset(),
601 OpVT, LN->isVolatile(), 16);
602 }
603 }
604
605 // Catch all other cases where we can't guarantee that we have a
606 // 16-byte aligned entity, which means resorting to an X-form
607 // address scheme:
608
609 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
610 SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
611 SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
612
613 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
614
615 SDOperand alignLoad =
616 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
617 LN->getSrcValue(), LN->getSrcValueOffset(),
618 LN->isVolatile(), 16);
619
620 SDOperand insertEltOp =
621 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
622
623 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
624 alignLoad,
625 alignLoad,
626 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
627
628 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
629
630 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
631 SDOperand retops[2] = { result, the_chain };
632
633 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
634 return result;
635 }
636 break;
617
618 result = DAG.getNode(NewOpC, OpVT, result);
619 }
620
621 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
622 SDOperand retops[2] = { result, the_chain };
623
624 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
625 return result;
637626 }
638627 case ISD::PRE_INC:
639628 case ISD::PRE_DEC:
663652 MVT::ValueType VT = Value.getValueType();
664653 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
665654 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
666 SDOperand the_chain = SN->getChain();
667 //unsigned alignment = SN->getAlignment();
668 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
655 unsigned alignment = SN->getAlignment();
669656
670657 switch (SN->getAddressingMode()) {
671658 case ISD::UNINDEXED: {
672 SDOperand basep = SN->getBasePtr();
673 SDOperand ptrOp;
674 int offset;
675
676 if (basep.getOpcode() == ISD::FrameIndex) {
677 // FrameIndex nodes are always properly aligned. Really.
678 return SDOperand();
679 }
680
681 if (basep.getOpcode() == ISD::ADD) {
682 const ConstantSDNode *CN = cast(basep.Val->getOperand(1));
683 assert(CN != NULL
684 && "LowerSTORE: ISD::ADD operand 1 is not constant");
685 offset = unsigned(CN->getValue());
686 ptrOp = basep.getOperand(0);
687 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
688 << offset
689 << "\n");
690 } else {
691 ptrOp = basep;
692 offset = 0;
693 }
659 int chunk_offset, slot_offset;
660 bool was16aligned;
694661
695662 // The vector type we really want to load from the 16-byte chunk, except
696663 // in the case of MVT::i1, which has to be v16i8.
697 unsigned vecVT, stVecVT;
698
664 unsigned vecVT, stVecVT = MVT::v16i8;
665
699666 if (StVT != MVT::i1)
700667 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
701 else
702 stVecVT = MVT::v16i8;
703668 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
704669
705 // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
706 // the actual dform addr offs($reg).
707 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
708 DAG.getConstant((offset & ~0xf), PtrVT));
709
710 // Create the 16-byte aligned vector load
711 SDOperand alignLoad =
712 DAG.getLoad(vecVT, the_chain, basep,
713 SN->getSrcValue(), SN->getSrcValueOffset(),
714 SN->isVolatile(), 16);
715 the_chain = alignLoad.getValue(1);
716
717 LoadSDNode *LN = cast(alignLoad);
670 SDOperand alignLoadVec =
671 AlignedLoad(Op, DAG, ST, SN, alignment,
672 chunk_offset, slot_offset, VT, was16aligned);
673
674 if (alignLoadVec.Val == 0)
675 return alignLoadVec;
676
677 LoadSDNode *LN = cast(alignLoadVec);
678 SDOperand basePtr = LN->getBasePtr();
679 SDOperand the_chain = alignLoadVec.getValue(1);
718680 SDOperand theValue = SN->getValue();
719681 SDOperand result;
720682
726688 theValue = theValue.getOperand(0);
727689 }
728690
729 SDOperand insertEltOp =
730 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
731 DAG.getNode(SPUISD::DFormAddr, PtrVT,
732 ptrOp,
733 DAG.getConstant((offset & 0xf), PtrVT)));
734
691 chunk_offset &= 0xf;
692 chunk_offset /= (MVT::getSizeInBits(StVT == MVT::i1 ? (unsigned) MVT::i8 : StVT) / 8);
693
694 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
695 SDOperand insertEltPtr;
696 SDOperand insertEltOp;
697
698 // If the base pointer is already a D-form address, then just create
699 // a new D-form address with a slot offset and the orignal base pointer.
700 // Otherwise generate a D-form address with the slot offset relative
701 // to the stack pointer, which is always aligned.
702 if (basePtr.getOpcode() == SPUISD::DFormAddr) {
703 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
704 basePtr.getOperand(0),
705 insertEltOffs);
706 } else {
707 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
708 DAG.getRegister(SPU::R1, PtrVT),
709 insertEltOffs);
710 }
711
712 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
735713 result = DAG.getNode(SPUISD::SHUFB, vecVT,
736714 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
737 alignLoad,
715 alignLoadVec,
738716 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
739717
740 result = DAG.getStore(the_chain, result, basep,
718 result = DAG.getStore(the_chain, result, basePtr,
741719 LN->getSrcValue(), LN->getSrcValueOffset(),
742720 LN->isVolatile(), LN->getAlignment());
743721
766744 ConstantPoolSDNode *CP = cast(Op);
767745 Constant *C = CP->getConstVal();
768746 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
747 SDOperand Zero = DAG.getConstant(0, PtrVT);
769748 const TargetMachine &TM = DAG.getTarget();
770 SDOperand Zero = DAG.getConstant(0, PtrVT);
771749
772750 if (TM.getRelocationModel() == Reloc::Static) {
773751 if (!ST->usingLargeMem()) {
774752 // Just return the SDOperand with the constant pool address in it.
775753 return CPI;
776754 } else {
755 #if 1
777756 // Generate hi/lo address pair
778757 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
779758 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
780759
781760 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
761 #else
762 return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
763 #endif
782764 }
783765 }
784766
796778 const TargetMachine &TM = DAG.getTarget();
797779
798780 if (TM.getRelocationModel() == Reloc::Static) {
799 if (!ST->usingLargeMem()) {
800 // Just return the SDOperand with the jump table address in it.
801 return JTI;
802 } else {
803 // Generate hi/lo address pair
804 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
805 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
806
807 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
808 }
781 return (!ST->usingLargeMem()
782 ? JTI
783 : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
809784 }
810785
811786 assert(0 &&
819794 GlobalAddressSDNode *GSDN = cast(Op);
820795 GlobalValue *GV = GSDN->getGlobal();
821796 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
797 const TargetMachine &TM = DAG.getTarget();
822798 SDOperand Zero = DAG.getConstant(0, PtrVT);
823 const TargetMachine &TM = DAG.getTarget();
824799
825800 if (TM.getRelocationModel() == Reloc::Static) {
826 if (!ST->usingLargeMem()) {
827 // Generate a local store address
828 return GA;
829 } else {
830 // Generate hi/lo address pair
831 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
832 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
833
834 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
835 }
801 return (!ST->usingLargeMem()
802 ? GA
803 : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
836804 } else {
837805 cerr << "LowerGlobalAddress: Relocation model other than static not "
838806 << "supported.\n";
10731041
10741042 static
10751043 SDOperand
1076 LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1044 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
10771045 SDOperand Chain = Op.getOperand(0);
10781046 #if 0
10791047 bool isVarArg = cast(Op.getOperand(2))->getValue() != 0;
11831151 if (GlobalAddressSDNode *G = dyn_cast(Callee)) {
11841152 GlobalValue *GV = G->getGlobal();
11851153 unsigned CalleeVT = Callee.getValueType();
1186
1187 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1188 // style calls, otherwise, external symbols are BRASL calls.
1189 // NOTE:
1190 // This may be an unsafe assumption for JIT and really large compilation
1191 // units.
1192 if (GV->isDeclaration()) {
1193 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1154 SDOperand Zero = DAG.getConstant(0, PtrVT);
1155 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1156
1157 if (!ST->usingLargeMem()) {
1158 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1159 // style calls, otherwise, external symbols are BRASL calls. This assumes
1160 // that declared/defined symbols are in the same compilation unit and can
1161 // be reached through PC-relative jumps.
1162 //
1163 // NOTE:
1164 // This may be an unsafe assumption for JIT and really large compilation
1165 // units.
1166 if (GV->isDeclaration()) {
1167 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1168 } else {
1169 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1170 }
11941171 } else {
1195 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1196 DAG.getTargetGlobalAddress(GV, CalleeVT),
1197 DAG.getConstant(0, PtrVT));
1172 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1173 // address pairs:
1174 Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
11981175 }
11991176 } else if (ExternalSymbolSDNode *S = dyn_cast(Callee))
12001177 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1201 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1178 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
12021179 // If this is an absolute destination address that appears to be a legal
12031180 // local store address, use the munged value.
12041181 Callee = SDOperand(Dest, 0);
1182 }
12051183
12061184 Ops.push_back(Chain);
12071185 Ops.push_back(Callee);
24672445 case ISD::FORMAL_ARGUMENTS:
24682446 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
24692447 case ISD::CALL:
2470 return LowerCALL(Op, DAG);
2448 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
24712449 case ISD::RET:
24722450 return LowerRET(Op, DAG, getTargetMachine());
24732451
3030 Hi, ///< High address component (upper 16)
3131 Lo, ///< Low address component (lower 16)
3232 PCRelAddr, ///< Program counter relative address
33 AFormAddr, ///< A-form address (local store)
3334 DFormAddr, ///< D-Form address "imm($r)"
34 XFormAddr, ///< X-Form address "$r1($r2)"
35 XFormAddr, ///< X-Form address "$r($r)"
3536
3637 LDRESULT, ///< Load result (value, chain)
3738 CALL, ///< CALL instruction
157157 def LQAr32:
158158 RI16Form<0b100001100, (outs R32C:$rT), (ins addr256k:$src),
159159 "lqa\t$rT, $src", LoadStore,
160 [(set R32C:$rT, (load aform_addr:$src))]>;
160 [(set R32C:$rT, (load aform_addr:$src))]>;
161161
162162 def LQAf32:
163163 RI16Form<0b100001100, (outs R32FP:$rT), (ins addr256k:$src),
606606 RI16Form<0b100000110, (outs R32FP:$rT), (ins R32FP:$rS, f32imm:$val),
607607 "iohl\t$rT, $val", ImmLoad,
608608 [/* insert intrinsic here */]>,
609 RegConstraint<"$rS = $rT">,
610 NoEncode<"$rS">;
611
612 def IOHLlo:
613 RI16Form<0b100000110, (outs R32C:$rT), (ins R32C:$rS, symbolLo:$val),
614 "iohl\t$rT, $val", ImmLoad,
615 [/* no pattern */]>,
609616 RegConstraint<"$rS = $rT">,
610617 NoEncode<"$rS">;
611618
23662373 // are used here for type checking (instances where ROTQBI is used actually
23672374 // use vector registers)
23682375 def ROTQBYvec:
2369 RRForm<0b00111011100, (outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
2376 RRForm<0b00111011100, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
23702377 "rotqby\t$rT, $rA, $rB", RotateShift,
2371 [(set (v16i8 VECREG:$rT), (SPUrotbytes_left (v16i8 VECREG:$rA), R16C:$rB))]>;
2372
2373 def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R16C:$rB),
2374 (ROTQBYvec VECREG:$rA, R16C:$rB)>;
2378 [(set (v16i8 VECREG:$rT), (SPUrotbytes_left (v16i8 VECREG:$rA), R32C:$rB))]>;
2379
2380 def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R32C:$rB),
2381 (ROTQBYvec VECREG:$rA, R32C:$rB)>;
23752382
23762383 // See ROTQBY note above.
23772384 def ROTQBYIvec:
27192726 [/* no pattern to match: intrinsic */]>;
27202727
27212728 def CEQBIr8:
2722 RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm:$val),
2729 RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm_i8:$val),
27232730 "ceqbi\t$rT, $rA, $val", ByteOp,
27242731 [/* no pattern to match: intrinsic */]>;
27252732
27262733 def CEQBIv16i8:
2727 RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm:$val),
2734 RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm_i8:$val),
27282735 "ceqbi\t$rT, $rA, $val", ByteOp,
27292736 [/* no pattern to match: intrinsic */]>;
27302737
27922799 def BRASL:
27932800 BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
27942801 "brasl\t$$lr, $func",
2795 [(SPUcall tglobaladdr:$func)]>;
2802 [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
27962803
27972804 // Branch indirect and set link if external data. These instructions are not
27982805 // actually generated, matched by an intrinsic:
34673474 // low parts in order to load them into a register.
34683475 //===----------------------------------------------------------------------===//
34693476
3470 def : Pat<(SPUhi tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>;
3471 def : Pat<(SPUlo tglobaladdr:$in, 0), (ILAlo tglobaladdr:$in)>;
3472 def : Pat<(SPUdform tglobaladdr:$in, imm:$imm), (ILAlsa tglobaladdr:$in)>;
3473 def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>;
3474 def : Pat<(SPUlo tconstpool:$in , 0), (ILAlo tconstpool:$in)>;
3475 def : Pat<(SPUdform tconstpool:$in, imm:$imm), (ILAlsa tconstpool:$in)>;
3476 def : Pat<(SPUhi tjumptable:$in, 0), (ILHUhi tjumptable:$in)>;
3477 def : Pat<(SPUlo tjumptable:$in, 0), (ILAlo tjumptable:$in)>;
3478 def : Pat<(SPUdform tjumptable:$in, imm:$imm), (ILAlsa tjumptable:$in)>;
3479
3480 // Force load of global address to a register. These forms show up in
3481 // SPUISD::DFormAddr pseudo instructions:
3482 def : Pat<(add tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
3483 def : Pat<(add tconstpool:$in, 0), (ILAlsa tglobaladdr:$in)>;
3484 def : Pat<(add tjumptable:$in, 0), (ILAlsa tglobaladdr:$in)>;
3477 def : Pat<(SPUhi tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>;
3478 def : Pat<(SPUlo tglobaladdr:$in, 0), (ILAlo tglobaladdr:$in)>;
3479 def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
3480 def : Pat<(SPUxform tglobaladdr:$in, 0),
3481 (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
3482 def : Pat<(SPUhi tjumptable:$in, 0), (ILHUhi tjumptable:$in)>;
3483 def : Pat<(SPUlo tjumptable:$in, 0), (ILAlo tjumptable:$in)>;
3484 def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>;
3485 def : Pat<(SPUxform tjumptable:$in, 0),
3486 (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
3487 def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>;
3488 def : Pat<(SPUlo tconstpool:$in , 0), (ILAlo tconstpool:$in)>;
3489 def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
3490 /* def : Pat<(SPUxform tconstpool:$in, 0),
3491 (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; */
3492
34853493 // Instrinsics:
34863494 include "CellSDKIntrinsics.td"
185185 // PC-relative address
186186 def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
187187
188 // A-Form local store addresses
189 def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
190
188191 // D-Form "imm($reg)" addresses
189192 def SPUdform : SDNode<"SPUISD::DFormAddr", SDTIntBinOp, []>;
193
194 // X-Form "$reg($reg)" addresses
195 def SPUxform : SDNode<"SPUISD::XFormAddr", SDTIntBinOp, []>;
190196
191197 // SPU 32-bit sign-extension to 64-bits
192198 def SPUsext32_to_64: SDNode<"SPUISD::SEXT32TO64", SDTIntExtendOp, []>;
139139 return ((Value & ((1 << 19) - 1)) == Value);
140140 }]>;
141141
142 def lo16 : PatLeaf<(imm), [{
143 // hi16 predicate - returns true if the immediate has all zeros in the
144 // low order bits and is a 32-bit constant:
145 if (N->getValueType(0) == MVT::i32) {
146 uint32_t val = N->getValue();
147 return ((val & 0x0000ffff) == val);
148 }
149
150 return false;
151 }], LO16>;
152
142153 def hi16 : PatLeaf<(imm), [{
143154 // hi16 predicate - returns true if the immediate has all zeros in the
144155 // low order bits and is a 32-bit constant:
410421 //===----------------------------------------------------------------------===//
411422 // Operand Definitions.
412423
413 def s7imm: Operand16> {
424 def s7imm: Operand8> {
425 let PrintMethod = "printS7ImmOperand";
426 }
427
428 def s7imm_i8: Operand {
414429 let PrintMethod = "printS7ImmOperand";
415430 }
416431
33 ; RUN: grep andi %t1.s | count 36
44 ; RUN: grep andhi %t1.s | count 30
55 ; RUN: grep andbi %t1.s | count 4
6 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
7 target triple = "spu"
68
79 ; AND instruction generation:
810 define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep bisl %t1.s | count 6 &&
2 ; RUN: grep ila %t1.s | count 1 &&
3 ; RUN: grep rotqbyi %t1.s | count 4 &&
4 ; RUN: grep lqa %t1.s | count 4 &&
5 ; RUN: grep lqd %t1.s | count 6 &&
6 ; RUN: grep dispatch_tab %t1.s | count 10
7 ; ModuleID = 'call_indirect.bc'
8 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
9 target triple = "spu-unknown-elf"
10
11 @dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
12
13 define void @dispatcher(i32 %i_arg, float %f_arg) {
14 entry:
15 %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
16 tail call void %tmp2( i32 %i_arg, float %f_arg )
17 %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
18 tail call void %tmp2.1( i32 %i_arg, float %f_arg )
19 %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
20 tail call void %tmp2.2( i32 %i_arg, float %f_arg )
21 %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
22 tail call void %tmp2.3( i32 %i_arg, float %f_arg )
23 %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
24 tail call void %tmp2.4( i32 %i_arg, float %f_arg )
25 %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
26 tail call void %tmp2.5( i32 %i_arg, float %f_arg )
27 ret void
28 }
22 ; RUN: grep andi %t1.s | count 3 &&
33 ; RUN: grep rotmi %t1.s | count 2 &&
44 ; RUN: grep rothmi %t1.s | count 1
5 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
6 target triple = "spu"
57
68 declare i32 @llvm.ctpop.i8(i8)
79 declare i32 @llvm.ctpop.i16(i16)
66 ; RUN: grep dfnms %t1.s | count 4
77 ;
88 ; This file includes double precision floating point arithmetic instructions
9 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
10 target triple = "spu"
911
1012 define double @fadd(double %arg1, double %arg2) {
1113 %A = add double %arg1, %arg2
99 ; Alternatively, a ^ ~b, which the compiler will also match.
1010
1111 ; ModuleID = 'eqv.bc'
12 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
13 target triple = "spu"
1214
1315 define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
1416 %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
44 ; RUN: grep lqx %t2.s | count 27 &&
55 ; RUN: grep space %t1.s | count 8 &&
66 ; RUN: grep byte %t1.s | count 424
7 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
8 target triple = "spu"
79
810 define i32 @i32_extract_0(<4 x i32> %v) {
911 entry:
22 ; RUN: grep fcmeq %t1.s | count 1
33 ;
44 ; This file includes standard floating point arithmetic instructions
5 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
6 target triple = "spu"
57
68 declare double @fabs(double)
79 declare float @fabsf(float)
55 ; RUN: grep fnms %t1.s | count 2
66 ;
77 ; This file includes standard floating point arithmetic instructions
8 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
9 target triple = "spu"
810
911 define float @fdiv32(float %arg1, float %arg2) {
1012 %A = fdiv float %arg1, %arg2
33 ; RUN: grep xor %t1.s | count 4 &&
44 ; RUN: grep and %t1.s | count 5 &&
55 ; RUN: grep andbi %t1.s | count 3
6 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
7 target triple = "spu"
68
79 define double @fneg_dp(double %X) {
810 %Y = sub double -0.000000e+00, %X
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
11 ; RUN: grep "ilh" %t1.s | count 5
2 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
3 target triple = "spu"
24
35 define i16 @test_1() {
46 %x = alloca i16, align 16
1111 ; RUN: grep 49077 %t1.s | count 1 &&
1212 ; RUN: grep 1267 %t1.s | count 2 &&
1313 ; RUN: grep 16309 %t1.s | count 1
14 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
15 target triple = "spu"
1416
1517 define i32 @test_1() {
1618 ret i32 4784128 ;; ILHU via pattern (0x49000)
99 ; RUN: grep 192 %t1.s | count 32 &&
1010 ; RUN: grep 128 %t1.s | count 30 &&
1111 ; RUN: grep 224 %t1.s | count 2
12
13 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
14 target triple = "spu"
1215
1316 ; 1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
1417 ; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
55 ; RUN: grep and %t1.s | count 2 &&
66 ; RUN: grep andi %t1.s | count 1 &&
77 ; RUN: grep ila %t1.s | count 1
8
9 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
10 target triple = "spu"
811
912 define float @sitofp_i32(i32 %arg1) {
1013 %A = sitofp i32 %arg1 to float ; [#uses=1]
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep ceq %t1.s | count 30 &&
2 ; RUN: grep ceqb %t1.s | count 10 &&
3 ; RUN: grep ceqhi %t1.s | count 5 &&
4 ; RUN: grep ceqi %t1.s | count 5 &&
5 ; RUN: grep cgt %t1.s | count 30 &&
6 ; RUN: grep cgtb %t1.s | count 10 &&
7 ; RUN: grep cgthi %t1.s | count 5 &&
8 ; RUN: grep cgti %t1.s | count 5
9 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
10 target triple = "spu"
11
12 declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
13
14 declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
15 declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
16 declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
17 declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
18 declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
19 declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
20
21 declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
22 declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
23 declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
24 declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
25 declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
26 declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
27
28 declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
29 declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
30 declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
31 declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
32 declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
33 declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
34
35
36
37 define <4 x i32> @test(<4 x i32> %A) {
38 call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
39 %Y = bitcast <4 x i32> %1 to <4 x i32>
40 ret <4 x i32> %Y
41 }
42
43 define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
44 call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
45 %Y = bitcast <4 x i32> %1 to <4 x i32>
46 ret <4 x i32> %Y
47 }
48
49 define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
50 call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
51 %Y = bitcast <8 x i16> %1 to <8 x i16>
52 ret <8 x i16> %Y
53 }
54
55 define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
56 call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
57 %Y = bitcast <16 x i8> %1 to <16 x i8>
58 ret <16 x i8> %Y
59 }
60
61 define <4 x i32> @ceqitest(<4 x i32> %A) {
62 call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
63 %Y = bitcast <4 x i32> %1 to <4 x i32>
64 ret <4 x i32> %Y
65 }
66
67 define <8 x i16> @ceqhitest(<8 x i16> %A) {
68 call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
69 %Y = bitcast <8 x i16> %1 to <8 x i16>
70 ret <8 x i16> %Y
71 }
72
73 define <16 x i8> @ceqbitest(<16 x i8> %A) {
74 call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
75 %Y = bitcast <16 x i8> %1 to <16 x i8>
76 ret <16 x i8> %Y
77 }
78
79 define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
80 call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
81 %Y = bitcast <4 x i32> %1 to <4 x i32>
82 ret <4 x i32> %Y
83 }
84
85 define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
86 call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
87 %Y = bitcast <8 x i16> %1 to <8 x i16>
88 ret <8 x i16> %Y
89 }
90
91 define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
92 call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
93 %Y = bitcast <16 x i8> %1 to <16 x i8>
94 ret <16 x i8> %Y
95 }
96
97 define <4 x i32> @cgtitest(<4 x i32> %A) {
98 call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
99 %Y = bitcast <4 x i32> %1 to <4 x i32>
100 ret <4 x i32> %Y
101 }
102
103 define <8 x i16> @cgthitest(<8 x i16> %A) {
104 call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
105 %Y = bitcast <8 x i16> %1 to <8 x i16>
106 ret <8 x i16> %Y
107 }
108
109 define <16 x i8> @cgtbitest(<16 x i8> %A) {
110 call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
111 %Y = bitcast <16 x i8> %1 to <16 x i8>
112 ret <16 x i8> %Y
113 }
114
115 define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
116 call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
117 %Y = bitcast <4 x i32> %1 to <4 x i32>
118 ret <4 x i32> %Y
119 }
120
121 define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
122 call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
123 %Y = bitcast <8 x i16> %1 to <8 x i16>
124 ret <8 x i16> %Y
125 }
126
127 define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
128 call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
129 %Y = bitcast <16 x i8> %1 to <16 x i8>
130 ret <16 x i8> %Y
131 }
132
133 define <4 x i32> @clgtitest(<4 x i32> %A) {
134 call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
135 %Y = bitcast <4 x i32> %1 to <4 x i32>
136 ret <4 x i32> %Y
137 }
138
139 define <8 x i16> @clgthitest(<8 x i16> %A) {
140 call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
141 %Y = bitcast <8 x i16> %1 to <8 x i16>
142 ret <8 x i16> %Y
143 }
144
145 define <16 x i8> @clgtbitest(<16 x i8> %A) {
146 call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
147 %Y = bitcast <16 x i8> %1 to <16 x i8>
148 ret <16 x i8> %Y
149 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep fa %t1.s | count 5 &&
2 ; RUN: grep fs %t1.s | count 5 &&
3 ; RUN: grep fm %t1.s | count 15 &&
4 ; RUN: grep fceq %t1.s | count 5 &&
5 ; RUN: grep fcmeq %t1.s | count 5 &&
6 ; RUN: grep fcgt %t1.s | count 5 &&
7 ; RUN: grep fcmgt %t1.s | count 5 &&
8 ; RUN: grep fma %t1.s | count 5 &&
9 ; RUN: grep fnms %t1.s | count 5 &&
10 ; RUN: grep fms %t1.s | count 5
11 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
12 target triple = "spu"
13
14 declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
15
16 declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
17 declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
18 declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
19
20 declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
21 declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
22 declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
23 declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
24
25 declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
26 declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
27 declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
28
29 define <4 x i32> @test(<4 x i32> %A) {
30 call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
31 %Y = bitcast <4 x i32> %1 to <4 x i32>
32 ret <4 x i32> %Y
33 }
34
35 define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
36 call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
37 %Y = bitcast <4 x float> %1 to <4 x float>
38 ret <4 x float> %Y
39 }
40
41 define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
42 call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
43 %Y = bitcast <4 x float> %1 to <4 x float>
44 ret <4 x float> %Y
45 }
46
47 define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
48 call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
49 %Y = bitcast <4 x float> %1 to <4 x float>
50 ret <4 x float> %Y
51 }
52
53 define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
54 call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
55 %Y = bitcast <4 x float> %1 to <4 x float>
56 ret <4 x float> %Y
57 }
58
59 define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
60 call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
61 %Y = bitcast <4 x float> %1 to <4 x float>
62 ret <4 x float> %Y
63 }
64
65 define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
66 call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
67 %Y = bitcast <4 x float> %1 to <4 x float>
68 ret <4 x float> %Y
69 }
70
71 define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
72 call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
73 %Y = bitcast <4 x float> %1 to <4 x float>
74 ret <4 x float> %Y
75 }
76
77 define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
78 call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
79 %Y = bitcast <4 x float> %1 to <4 x float>
80 ret <4 x float> %Y
81 }
82
83 define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
84 call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
85 %Y = bitcast <4 x float> %1 to <4 x float>
86 ret <4 x float> %Y
87 }
88
89 define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
90 call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
91 %Y = bitcast <4 x float> %1 to <4 x float>
92 ret <4 x float> %Y
93 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep and %t1.s | count 20 &&
2 ; RUN: grep andc %t1.s | count 5
3 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
4 target triple = "spu"
5
6 declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
7 declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
8 declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
9 declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
10 declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
11
12 declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
13 declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
14 declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
15 declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
16 declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
17
18 declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
19 declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
20 declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
21 declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
22
23 declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
24 declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
25
26 define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
27 call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
28 %Y = bitcast <4 x i32> %1 to <4 x i32>
29 ret <4 x i32> %Y
30 }
31
32 define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
33 call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
34 %Y = bitcast <4 x i32> %1 to <4 x i32>
35 ret <4 x i32> %Y
36 }
37
38 define <4 x i32> @anditest(<4 x i32> %A) {
39 call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
40 %Y = bitcast <4 x i32> %1 to <4 x i32>
41 ret <4 x i32> %Y
42 }
43
44 define <8 x i16> @andhitest(<8 x i16> %A) {
45 call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
46 %Y = bitcast <8 x i16> %1 to <8 x i16>
47 ret <8 x i16> %Y
48 }
22 ; RUN: grep and %t1.s | count 94
33 ; RUN: grep xsbh %t1.s | count 2
44 ; RUN: grep xshw %t1.s | count 4
5 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
6 target triple = "spu"
57
68 define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
79 %A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1]
33 ; RUN: grep ori %t1.s | count 30
44 ; RUN: grep orhi %t1.s | count 30
55 ; RUN: grep orbi %t1.s | count 15
6 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
7 target triple = "spu"
68
79 ; OR instruction generation:
810 define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
77 ; RUN grep rothi.*,.3 %t1.s | count 1
88 ; RUN: grep andhi %t1.s | count 4
99 ; RUN: grep shlhi %t1.s | count 4
10 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
11 target triple = "spu"
1012
1113 ; Vector rotates are not currently supported in gcc or llvm assembly. These are
1214 ; not tested.
22 ; RUN: grep and %t1.s | count 2
33 ; RUN: grep xsbh %t1.s | count 1
44 ; RUN: grep xshw %t1.s | count 2
5 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
6 target triple = "spu"
57
68 define <16 x i8> @selb_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
79 %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
44 ; RUN: grep shli %t1.s | count 51
55 ; RUN: grep xshw %t1.s | count 5
66 ; RUN: grep and %t1.s | count 5
7 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
8 target triple = "spu"
79
810 ; Vector shifts are not currently supported in gcc or llvm assembly. These are
911 ; not tested.
77 ;
88 ; This file includes standard floating point arithmetic instructions
99 ; NOTE fdiv is tested separately since it is a compound operation
10 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
11 target triple = "spu"
1012
1113 define float @fp_add(float %arg1, float %arg2) {
1214 %A = add float %arg1, %arg2 ; [#uses=1]
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep lqa %t1.s | count 10 &&
2 ; RUN: grep lqd %t1.s | count 2 &&
3 ; RUN: grep rotqbyi %t1.s | count 5 &&
4 ; RUN: grep xshw %t1.s | count 1 &&
5 ; RUN: grep andi %t1.s | count 4 &&
6 ; RUN: grep cbd %t1.s | count 3 &&
7 ; RUN: grep chd %t1.s | count 1 &&
8 ; RUN: grep cwd %t1.s | count 1 &&
9 ; RUN: grep shufb %t1.s | count 5 &&
10 ; RUN: grep stqa %t1.s | count 5
11 ; ModuleID = 'struct_1.bc'
12 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
13 target triple = "spu"
14
15 ; struct hackstate {
16 ; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3)
17 ; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3)
18 ; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3)
19 ; int i1; // offset 4 (rotate left by 4 bytes to byte 0)
20 ; short s1; // offset 8 (rotate left by 6 bytes to byte 2)
21 ; int i2; // offset 12 [ignored]
22 ; unsigned char c4; // offset 16 [ignored]
23 ; unsigned char c5; // offset 17 [ignored]
24 ; unsigned char c6; // offset 18 [ignored]
25 ; unsigned char c7; // offset 19 (no rotate, in preferred slot)
26 ; int i3; // offset 20 [ignored]
27 ; int i4; // offset 24 [ignored]
28 ; int i5; // offset 28 [ignored]
29 ; int i6; // offset 32 (no rotate, in preferred slot)
30 ; }
31 %struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
32
33 ; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
34 @state = global %struct.hackstate zeroinitializer, align 16
35
36 define i8 @get_hackstate_c1() zeroext {
37 entry:
38 %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
39 ret i8 %tmp2
40 }
41
42 define i8 @get_hackstate_c2() zeroext {
43 entry:
44 %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
45 ret i8 %tmp2
46 }
47
48 define i8 @get_hackstate_c3() zeroext {
49 entry:
50 %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
51 ret i8 %tmp2
52 }
53
54 define i32 @get_hackstate_i1() {
55 entry:
56 %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
57 ret i32 %tmp2
58 }
59
60 define i16 @get_hackstate_s1() signext {
61 entry:
62 %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
63 ret i16 %tmp2
64 }
65
66 define i8 @get_hackstate_c7() zeroext {
67 entry:
68 %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
69 ret i8 %tmp2
70 }
71
72 define i32 @get_hackstate_i6() zeroext {
73 entry:
74 %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
75 ret i32 %tmp2
76 }
77
78 define void @set_hackstate_c1(i8 zeroext %c) {
79 entry:
80 store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
81 ret void
82 }
83
84 define void @set_hackstate_c2(i8 zeroext %c) {
85 entry:
86 store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
87 ret void
88 }
89
90 define void @set_hackstate_c3(i8 zeroext %c) {
91 entry:
92 store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
93 ret void
94 }
95
96 define void @set_hackstate_i1(i32 %i) {
97 entry:
98 store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
99 ret void
100 }
101
102 define void @set_hackstate_s1(i16 signext %s) {
103 entry:
104 store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
105 ret void
106 }