llvm.org GIT mirror llvm / a0792de
- Add TargetInstrInfo::getOperandLatency() to compute operand latencies. This allow target to correctly compute latency for cases where static scheduling itineraries isn't sufficient. e.g. variable_ops instructions such as ARM::ldm. This also allows target without scheduling itineraries to compute operand latencies. e.g. X86 can return (approximated) latencies for high latency instructions such as division. - Compute operand latencies for those defined by load multiple instructions, e.g. ldm and those used by store multiple instructions, e.g. stm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115755 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 9 years ago
15 changed file(s) with 354 addition(s) and 68 deletion(s). Raw diff Collapse all Expand all
607607 /// instruction will be decoded to on the target cpu.
608608 virtual unsigned getNumMicroOps(const MachineInstr *MI,
609609 const InstrItineraryData *ItinData) const;
610
611 /// getOperandLatency - Compute and return the use operand latency of a given
612 /// itinerary class and operand index if the value is produced by an
613 /// instruction of the specified itinerary class and def operand index.
614 /// In most cases, the static scheduling itinerary was enough to determine the
615 /// operand latency. But it may not be possible for instructions with variable
616 /// number of defs / uses.
617 virtual
618 int getOperandLatency(const InstrItineraryData *ItinData,
619 const MachineInstr *DefMI, unsigned DefIdx,
620 const MachineInstr *UseMI, unsigned UseIdx) const;
621
622 virtual
623 int getOperandLatency(const InstrItineraryData *ItinData,
624 SDNode *DefNode, unsigned DefIdx,
625 SDNode *UseNode, unsigned UseIdx) const;
610626 };
611627
612628 /// TargetInstrInfoImpl - This is the default implementation of
526526 MachineInstr *DefMI = Def->getInstr();
527527 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
528528 if (DefIdx != -1) {
529 unsigned DefClass = DefMI->getDesc().getSchedClass();
529 const TargetInstrDesc &DefTID = DefMI->getDesc();
530 unsigned DefClass = DefTID.getSchedClass();
531
530532 MachineInstr *UseMI = Use->getInstr();
531 unsigned UseClass = UseMI->getDesc().getSchedClass();
532
533533 // For all uses of the register, calculate the maxmimum latency
534534 int Latency = -1;
535535 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
540540 if (MOReg != Reg)
541541 continue;
542542
543 int UseCycle = InstrItins->getOperandLatency(DefClass, DefIdx,
544 UseClass, i);
543 int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, UseMI, i);
545544 Latency = std::max(Latency, UseCycle);
546545
547546 // If we found a latency, then replace the existing dependence latency.
449449 if (ForceUnitLatencies())
450450 return;
451451
452 if (!InstrItins || InstrItins->isEmpty())
453 return;
454
455452 if (dep.getKind() != SDep::Data)
456453 return;
457454
458455 unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
459 if (!Def->isMachineOpcode())
460 return;
461
462 const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
463 if (DefIdx >= II.getNumDefs())
464 return;
465
466 int Latency = 0;
467 if (!Use->isMachineOpcode()) {
468 Latency = InstrItins->getOperandCycle(II.getSchedClass(), DefIdx);
469 } else {
470 unsigned DefClass = II.getSchedClass();
471 unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
472 Latency = InstrItins->getOperandLatency(DefClass, DefIdx, UseClass, OpIdx);
473 }
474
456 int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
475457 if (Latency >= 0)
476458 dep.setLatency(Latency);
477459 }
16411641 }
16421642 }
16431643 }
1644
1645 int
1646 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
1647 const TargetInstrDesc &DefTID,
1648 unsigned DefIdx, unsigned DefAlign,
1649 const TargetInstrDesc &UseTID,
1650 unsigned UseIdx, unsigned UseAlign) const {
1651 unsigned DefClass = DefTID.getSchedClass();
1652 unsigned UseClass = UseTID.getSchedClass();
1653
1654 if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
1655 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
1656
1657 // This may be a def / use of a variable_ops instruction, the operand
1658 // latency might be determinable dynamically. Let the target try to
1659 // figure it out.
1660 bool LdmBypass = false;
1661 int DefCycle = -1;
1662 switch (DefTID.getOpcode()) {
1663 default:
1664 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
1665 break;
1666 case ARM::LDM_RET:
1667 case ARM::LDM:
1668 case ARM::LDM_UPD:
1669 case ARM::tLDM:
1670 case ARM::tLDM_UPD:
1671 case ARM::tPUSH:
1672 case ARM::t2LDM_RET:
1673 case ARM::t2LDM:
1674 case ARM::t2LDM_UPD: {
1675 LdmBypass = 1;
1676 unsigned RegNo = (DefIdx+1) - DefTID.getNumOperands() + 1;
1677 if (Subtarget.isCortexA8()) {
1678 // 4 registers would be issued: 1, 2, 1.
1679 // 5 registers would be issued: 1, 2, 2.
1680 DefCycle = RegNo / 2;
1681 if (DefCycle < 1)
1682 DefCycle = 1;
1683 // Result latency is issue cycle + 2: E2.
1684 DefCycle += 2;
1685 } else if (Subtarget.isCortexA9()) {
1686 DefCycle = (RegNo / 2);
1687 // If there are odd number of registers or if it's not 64-bit aligned,
1688 // then it takes an extra AGU (Address Generation Unit) cycle.
1689 if ((RegNo % 2) || DefAlign < 8)
1690 ++DefCycle;
1691 // Result latency is AGU cycles + 2.
1692 DefCycle += 2;
1693 } else {
1694 // Assume the worst.
1695 DefCycle = RegNo + 2;
1696 }
1697 }
1698 }
1699
1700 if (DefCycle == -1)
1701 // We can't seem to determine the result latency of the def, assume it's 2.
1702 DefCycle = 2;
1703
1704 int UseCycle = -1;
1705 switch (UseTID.getOpcode()) {
1706 default:
1707 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
1708 break;
1709 case ARM::STM:
1710 case ARM::STM_UPD:
1711 case ARM::tSTM_UPD:
1712 case ARM::tPOP_RET:
1713 case ARM::tPOP:
1714 case ARM::t2STM:
1715 case ARM::t2STM_UPD: {
1716 unsigned RegNo = UseIdx - UseTID.getNumOperands() + 1;
1717 if (Subtarget.isCortexA8()) {
1718 // 4 registers would be issued: 1, 2, 1.
1719 // 5 registers would be issued: 1, 2, 2.
1720 UseCycle = RegNo / 2;
1721 if (UseCycle < 2)
1722 UseCycle = 2;
1723 // Result latency is issue cycle + 2: E2.
1724 UseCycle += 2;
1725 } else if (Subtarget.isCortexA9()) {
1726 UseCycle = (RegNo / 2);
1727 // If there are odd number of registers or if it's not 64-bit aligned,
1728 // then it takes an extra AGU (Address Generation Unit) cycle.
1729 if ((RegNo % 2) || UseAlign < 8)
1730 ++UseCycle;
1731 // Result latency is AGU cycles + 2.
1732 UseCycle += 2;
1733 } else {
1734 // Assume the worst.
1735 UseCycle = RegNo + 2;
1736 }
1737 }
1738 }
1739
1740 if (UseCycle == -1)
1741 // Assume it's read in the first stage.
1742 UseCycle = 1;
1743
1744 UseCycle = DefCycle - UseCycle + 1;
1745 if (UseCycle > 0) {
1746 if (LdmBypass) {
1747 // It's a variable_ops instruction so we can't use DefIdx here. Just use
1748 // first def operand.
1749 if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
1750 UseClass, UseIdx))
1751 --UseCycle;
1752 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
1753 UseClass, UseIdx))
1754 --UseCycle;
1755 }
1756
1757 return UseCycle;
1758 }
1759
1760 int
1761 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
1762 const MachineInstr *DefMI, unsigned DefIdx,
1763 const MachineInstr *UseMI, unsigned UseIdx) const {
1764 if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
1765 DefMI->isRegSequence() || DefMI->isImplicitDef())
1766 return 1;
1767
1768 const TargetInstrDesc &DefTID = DefMI->getDesc();
1769 if (!ItinData || ItinData->isEmpty())
1770 return DefTID.mayLoad() ? 3 : 1;
1771
1772 const TargetInstrDesc &UseTID = UseMI->getDesc();
1773 unsigned DefAlign = DefMI->hasOneMemOperand()
1774 ? (*DefMI->memoperands_begin())->getAlignment() : 0;
1775 unsigned UseAlign = UseMI->hasOneMemOperand()
1776 ? (*UseMI->memoperands_begin())->getAlignment() : 0;
1777 return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
1778 UseTID, UseIdx, UseAlign);
1779 }
1780
1781 int
1782 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
1783 SDNode *DefNode, unsigned DefIdx,
1784 SDNode *UseNode, unsigned UseIdx) const {
1785 if (!DefNode->isMachineOpcode())
1786 return 1;
1787
1788 const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
1789 if (!ItinData || ItinData->isEmpty())
1790 return DefTID.mayLoad() ? 3 : 1;
1791
1792 if (!UseNode->isMachineOpcode())
1793 return ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
1794
1795 const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
1796 const MachineSDNode *DefMN = dyn_cast(DefNode);
1797 unsigned DefAlign = !DefMN->memoperands_empty()
1798 ? (*DefMN->memoperands_begin())->getAlignment() : 0;
1799 const MachineSDNode *UseMN = dyn_cast(UseNode);
1800 unsigned UseAlign = !UseMN->memoperands_empty()
1801 ? (*UseMN->memoperands_begin())->getAlignment() : 0;
1802 return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
1803 UseTID, UseIdx, UseAlign);
1804 }
339339
340340 virtual unsigned getNumMicroOps(const MachineInstr *MI,
341341 const InstrItineraryData *ItinData) const;
342
343 virtual
344 int getOperandLatency(const InstrItineraryData *ItinData,
345 const MachineInstr *DefMI, unsigned DefIdx,
346 const MachineInstr *UseMI, unsigned UseIdx) const;
347 virtual
348 int getOperandLatency(const InstrItineraryData *ItinData,
349 SDNode *DefNode, unsigned DefIdx,
350 SDNode *UseNode, unsigned UseIdx) const;
351 private:
352 int getOperandLatency(const InstrItineraryData *ItinData,
353 const TargetInstrDesc &DefTID,
354 unsigned DefIdx, unsigned DefAlign,
355 const TargetInstrDesc &UseTID,
356 unsigned UseIdx, unsigned UseAlign) const;
342357 };
343358
344359 static inline
953953 hasExtraDefRegAllocReq = 1 in
954954 def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
955955 reglist:$dsts, variable_ops),
956 IndexModeUpd, LdStMulFrm, IIC_iLoadmBr,
956 IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr,
957957 "ldm${addr:submode}${p}\t$addr!, $dsts",
958958 "$addr.addr = $wb", []>;
959959
14621462 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
14631463 def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
14641464 reglist:$dsts, variable_ops),
1465 IndexModeNone, LdStMulFrm, IIC_iLoadm,
1465 IndexModeNone, LdStMulFrm, IIC_iLoad_m,
14661466 "ldm${addr:submode}${p}\t$addr, $dsts", "", []>;
14671467
14681468 def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
14691469 reglist:$dsts, variable_ops),
1470 IndexModeUpd, LdStMulFrm, IIC_iLoadm,
1470 IndexModeUpd, LdStMulFrm, IIC_iLoad_mu,
14711471 "ldm${addr:submode}${p}\t$addr!, $dsts",
14721472 "$addr.addr = $wb", []>;
14731473 } // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
14751475 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
14761476 def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
14771477 reglist:$srcs, variable_ops),
1478 IndexModeNone, LdStMulFrm, IIC_iStorem,
1478 IndexModeNone, LdStMulFrm, IIC_iStore_m,
14791479 "stm${addr:submode}${p}\t$addr, $srcs", "", []>;
14801480
14811481 def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
14821482 reglist:$srcs, variable_ops),
1483 IndexModeUpd, LdStMulFrm, IIC_iStorem,
1483 IndexModeUpd, LdStMulFrm, IIC_iStore_mu,
14841484 "stm${addr:submode}${p}\t$addr!, $srcs",
14851485 "$addr.addr = $wb", []>;
14861486 } // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
279279 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
280280 hasExtraDefRegAllocReq = 1 in
281281 def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops),
282 IIC_iLoadmBr,
282 IIC_iPop_Br,
283283 "pop${p}\t$dsts", []>,
284284 T1Misc<{1,1,0,?,?,?,?}>;
285285
534534 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
535535 def tLDM : T1I<(outs),
536536 (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
537 IIC_iLoadm,
537 IIC_iLoad_m,
538538 "ldm${addr:submode}${p}\t$addr, $dsts", []>,
539539 T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
540540
541541 def tLDM_UPD : T1It<(outs tGPR:$wb),
542542 (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
543 IIC_iLoadm,
543 IIC_iLoad_m,
544544 "ldm${addr:submode}${p}\t$addr!, $dsts",
545545 "$addr.addr = $wb", []>,
546546 T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
549549 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
550550 def tSTM_UPD : T1It<(outs tGPR:$wb),
551551 (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
552 IIC_iStorem,
552 IIC_iStore_mu,
553553 "stm${addr:submode}${p}\t$addr!, $srcs",
554554 "$addr.addr = $wb", []>,
555555 T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
556556
557557 let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
558 def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_iLoadmBr,
558 def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops),
559 IIC_iPop,
559560 "pop${p}\t$dsts", []>,
560561 T1Misc<{1,1,0,?,?,?,?}>;
561562
562563 let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
563 def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_iStorem,
564 def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops),
565 IIC_iStore_m,
564566 "push${p}\t$srcs", []>,
565567 T1Misc<{0,1,0,?,?,?,?}>;
566568
12421242
12431243 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
12441244 def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
1245 reglist:$dsts, variable_ops), IIC_iLoadm,
1245 reglist:$dsts, variable_ops), IIC_iLoad_m,
12461246 "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
12471247 let Inst{31-27} = 0b11101;
12481248 let Inst{26-25} = 0b00;
12531253 }
12541254
12551255 def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
1256 reglist:$dsts, variable_ops), IIC_iLoadm,
1256 reglist:$dsts, variable_ops),
1257 IIC_iLoad_mu,
12571258 "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
12581259 "$addr.addr = $wb", []> {
12591260 let Inst{31-27} = 0b11101;
12671268
12681269 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
12691270 def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
1270 reglist:$srcs, variable_ops), IIC_iStorem,
1271 reglist:$srcs, variable_ops), IIC_iStore_m,
12711272 "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
12721273 let Inst{31-27} = 0b11101;
12731274 let Inst{26-25} = 0b00;
12791280
12801281 def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
12811282 reglist:$srcs, variable_ops),
1282 IIC_iStorem,
1283 IIC_iStore_m,
12831284 "stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs",
12841285 "$addr.addr = $wb", []> {
12851286 let Inst{31-27} = 0b11101;
24722473 hasExtraDefRegAllocReq = 1 in
24732474 def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
24742475 reglist:$dsts, variable_ops),
2475 IIC_iLoadmBr,
2476 IIC_iLoad_mBr,
24762477 "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
24772478 "$addr.addr = $wb", []> {
24782479 let Inst{31-27} = 0b11101;
6666 def IIC_iLoad_d_i : InstrItinClass;
6767 def IIC_iLoad_d_r : InstrItinClass;
6868 def IIC_iLoad_d_ru : InstrItinClass;
69 def IIC_iLoadm : InstrItinClass<0>; // micro-coded
70 def IIC_iLoadmBr : InstrItinClass<0>; // micro-coded
69 def IIC_iLoad_m : InstrItinClass<0>; // micro-coded
70 def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded
71 def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded
72 def IIC_iPop : InstrItinClass<0>; // micro-coded
73 def IIC_iPop_Br : InstrItinClass<0>; // micro-coded
7174 def IIC_iLoadiALU : InstrItinClass;
7275 def IIC_iStore_i : InstrItinClass;
7376 def IIC_iStore_r : InstrItinClass;
8487 def IIC_iStore_d_i : InstrItinClass;
8588 def IIC_iStore_d_r : InstrItinClass;
8689 def IIC_iStore_d_ru : InstrItinClass;
87 def IIC_iStorem : InstrItinClass<0>; // micro-coded
90 def IIC_iStore_m : InstrItinClass<0>; // micro-coded
91 def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
8892 def IIC_Br : InstrItinClass;
8993 def IIC_fpSTAT : InstrItinClass;
9094 def IIC_fpUNA32 : InstrItinClass;
171171 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
172172 InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
173173 //
174 // Load multiple
175 InstrItinData,
174 // Load multiple, def is the 5th operand.
175 InstrItinData,
176176 InstrStage<2, [A8_Pipe0], 0>,
177177 InstrStage<2, [A8_Pipe1]>,
178178 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
179 InstrStage<1, [A8_LdSt0]>]>,
180
179 InstrStage<1, [A8_LdSt0]>], [1, 1, 1, 1, 3]>,
180 //
181 // Load multiple + update, defs are the 1st and 5th operands.
182 InstrItinData,
183 InstrStage<2, [A8_Pipe0], 0>,
184 InstrStage<2, [A8_Pipe1]>,
185 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
186 InstrStage<1, [A8_LdSt0]>], [2, 1, 1, 1, 3]>,
181187 //
182188 // Load multiple plus branch
183 InstrItinDatamBr , [InstrStage<2, [A8_Issue], 0>,
189 InstrItinData_mBr, [InstrStage<2, [A8_Issue], 0>,
184190 InstrStage<2, [A8_Pipe0], 0>,
185191 InstrStage<2, [A8_Pipe1]>,
186192 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
187193 InstrStage<1, [A8_LdSt0]>,
188 InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
194 InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
195 [1, 2, 1, 1, 3]>,
196 //
197 // Pop, def is the 3rd operand.
198 InstrItinData,
199 InstrStage<2, [A8_Pipe0], 0>,
200 InstrStage<2, [A8_Pipe1]>,
201 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
202 InstrStage<1, [A8_LdSt0]>], [1, 1, 3]>,
203 //
204 // Push, def is the 3th operand.
205 InstrItinData,
206 InstrStage<2, [A8_Pipe0], 0>,
207 InstrStage<2, [A8_Pipe1]>,
208 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
209 InstrStage<1, [A8_LdSt0]>,
210 InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
211 [1, 1, 3]>,
189212
190213 //
191214 // iLoadi + iALUr for t2LDRpci_pic.
265288 InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
266289 //
267290 // Store multiple
268 InstrItinDatam , [InstrStage<2, [A8_Issue], 0>,
291 InstrItinData_m , [InstrStage<2, [A8_Issue], 0>,
269292 InstrStage<2, [A8_Pipe0], 0>,
270293 InstrStage<2, [A8_Pipe1]>,
271294 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
272295 InstrStage<1, [A8_LdSt0]>]>,
296 //
297 // Store multiple + update
298 InstrItinData,
299 InstrStage<2, [A8_Pipe0], 0>,
300 InstrStage<2, [A8_Pipe1]>,
301 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
302 InstrStage<1, [A8_LdSt0]>], [2]>,
273303
274304 // Branch
275305 //
241241 InstrStage<2, [A9_AGU]>],
242242 [5, 4, 1, 1], [A9_LdBypass]>,
243243 //
244 // Load multiple
245 InstrItinData,
244 // Load multiple, def is the 5th operand.
245 InstrItinData,
246246 InstrStage<1, [A9_MUX0], 0>,
247247 InstrStage<2, [A9_AGU]>],
248 [3], [A9_LdBypass]>,
249
248 [1, 1, 1, 1, 3],
249 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
250 //
251 // Load multiple + update, defs are the 1st and 5th operands.
252 InstrItinData,
253 InstrStage<1, [A9_MUX0], 0>,
254 InstrStage<2, [A9_AGU]>],
255 [2, 1, 1, 1, 3],
256 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
250257 //
251258 // Load multiple plus branch
252 InstrItinDatamBr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
259 InstrItinData_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
253260 InstrStage<1, [A9_MUX0], 0>,
254261 InstrStage<1, [A9_AGU]>,
255 InstrStage<1, [A9_Branch]>]>,
262 InstrStage<1, [A9_Branch]>],
263 [1, 2, 1, 1, 3],
264 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
265 //
266 // Pop, def is the 3rd operand.
267 InstrItinData,
268 InstrStage<1, [A9_MUX0], 0>,
269 InstrStage<2, [A9_AGU]>],
270 [1, 1, 3],
271 [NoBypass, NoBypass, A9_LdBypass]>,
272 //
273 // Pop + branch, def is the 3rd operand.
274 InstrItinData,
275 InstrStage<1, [A9_MUX0], 0>,
276 InstrStage<2, [A9_AGU]>,
277 InstrStage<1, [A9_Branch]>],
278 [1, 1, 3],
279 [NoBypass, NoBypass, A9_LdBypass]>,
256280
257281 //
258282 // iLoadi + iALUr for t2LDRpci_pic.
328352 [3, 1, 1, 1]>,
329353 //
330354 // Store multiple
331 InstrItinDatam , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
355 InstrItinData_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
332356 InstrStage<1, [A9_MUX0], 0>,
333357 InstrStage<1, [A9_AGU]>]>,
358 //
359 // Store multiple + update
360 InstrItinData,
361 InstrStage<1, [A9_MUX0], 0>,
362 InstrStage<1, [A9_AGU]>], [2]>,
363
334364 // Branch
335365 //
336366 // no delay slots, so the latency of a branch is unimportant
337 InstrItinDataBranch]>]>,
367 InstrItinDataIssue0], 0>,
368 InstrStage<1, [A9_Issue1], 0>,
369 InstrStage<1, [A9_Branch]>]>,
338370
339371 // VFP and NEON shares the same register file. This means that every VFP
340372 // instruction should wait for full completion of the consecutive NEON
115115 InstrItinData], [5, 2, 2, 1]>,
116116
117117 //
118 // Load multiple
119 InstrItinData]>,
120
118 // Load multiple, def is the 5th operand.
119 InstrItinData], [1, 1, 1, 1, 4]>,
120 //
121 // Load multiple + update, defs are the 1st and 5th operands.
122 InstrItinData], [2, 1, 1, 1, 4]>,
121123 //
122124 // Load multiple plus branch
123 InstrItinData,
124 InstrStage<1, [V6_Pipe]>]>,
125 InstrItinData]>,
126 InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
125127
126128 //
127129 // iLoadi + iALUr for t2LDRpci_pic.
128130 InstrItinData,
129131 InstrStage<1, [V6_Pipe]>], [3, 1]>,
132
133 //
134 // Pop, def is the 3rd operand.
135 InstrItinData], [1, 1, 4]>,
136 //
137 // Pop + branch, def is the 3rd operand.
138 InstrItinData,
139 InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
130140
131141 // Integer store pipeline
132142 //
158168 InstrItinData], [2, 2, 2, 1]>,
159169 //
160170 // Store multiple
161 InstrItinDatam , [InstrStage<3, [V6_Pipe]>]>,
171 InstrItinData_m , [InstrStage<3, [V6_Pipe]>]>,
172 //
173 // Store multiple + update
174 InstrItinData], [2]>,
162175
163176 // Branch
164177 //
1111 //===----------------------------------------------------------------------===//
1212
1313 #include "llvm/Target/TargetInstrInfo.h"
14 #include "llvm/MC/MCAsmInfo.h"
1514 #include "llvm/Target/TargetInstrItineraries.h"
1615 #include "llvm/Target/TargetRegisterInfo.h"
16 #include "llvm/CodeGen/SelectionDAGNodes.h"
17 #include "llvm/MC/MCAsmInfo.h"
1718 #include "llvm/Support/ErrorHandling.h"
1819 using namespace llvm;
1920
6263 // override this function to return the right number.
6364 return 1;
6465 }
66
67 int
68 TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
69 const MachineInstr *DefMI, unsigned DefIdx,
70 const MachineInstr *UseMI, unsigned UseIdx) const {
71 if (!ItinData || ItinData->isEmpty())
72 return -1;
73
74 unsigned DefClass = DefMI->getDesc().getSchedClass();
75 unsigned UseClass = UseMI->getDesc().getSchedClass();
76 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
77 }
78
79 int
80 TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
81 SDNode *DefNode, unsigned DefIdx,
82 SDNode *UseNode, unsigned UseIdx) const {
83 if (!ItinData || ItinData->isEmpty())
84 return -1;
85
86 if (!DefNode->isMachineOpcode())
87 return -1;
88
89 unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
90 if (!UseNode->isMachineOpcode())
91 return ItinData->getOperandCycle(DefClass, DefIdx);
92 unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
93 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
94 }
95
6596
6697 /// insertNoop - Insert a noop into the instruction stream at the specified
6798 /// point.
1212 ; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi.
1313 define i32 @f2() nounwind optsize {
1414 ; ELF: f2:
15 ; ELF: mov r0, #128
16 ; ELF: str r0, [sp]
15 ; ELF: mov [[REGISTER:(r[0-9]+)]], #128
16 ; ELF: str [[REGISTER]], [sp]
1717 ; DARWIN: f2:
1818 ; DARWIN: mov r3, #128
1919 entry:
7878 ; CHECK-NEON: movw [[REGISTER_1:r[0-9]+]], #1123
7979 ; CHECK-NEON-NEXT: movs [[REGISTER_2:r[0-9]+]], #0
8080 ; CHECK-NEON-NEXT: cmp r0, [[REGISTER_1]]
81 ; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI
8281 ; CHECK-NEON-NEXT: it eq
8382 ; CHECK-NEON-NEXT: moveq [[REGISTER_2]], #4
83 ; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI
8484 ; CHECK-NEON-NEXT: ldr
8585 ; CHECK-NEON: bx
8686