llvm.org GIT mirror llvm / d9031ef
Revert "In visitSTORE, always use FindBetterChain, rather than only when UseAA is enabled." This reverts commit r293184 which is failing in LTO builds git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293188 91177308-0d34-0410-b5e6-96231b3b80d8 Nirav Dave 3 years ago
71 changed file(s) with 3834 addition(s) and 3677 deletion(s). Raw diff Collapse all Expand all
361361 unsigned AddrSpace) const {
362362 return false;
363363 }
364
365 /// Returns if it's reasonable to merge stores to MemVT size.
366 virtual bool canMergeStoresTo(EVT MemVT) const { return true; }
367364
368365 /// \brief Return true if it is cheap to speculate a call to intrinsic cttz.
369366 virtual bool isCheapToSpeculateCttz() const {
5151 STATISTIC(SlicedLoads, "Number of load sliced");
5252
5353 namespace {
54 static cl::opt
55 CombinerAA("combiner-alias-analysis", cl::Hidden,
56 cl::desc("Enable DAG combiner alias-analysis heuristics"));
57
5458 static cl::opt
5559 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
5660 cl::desc("Enable DAG combiner's use of IR alias analysis"));
412416 /// Holds a pointer to an LSBaseSDNode as well as information on where it
413417 /// is located in a sequence of memory operations connected by a chain.
414418 struct MemOpLink {
415 MemOpLink(LSBaseSDNode *N, int64_t Offset)
416 : MemNode(N), OffsetFromBase(Offset) {}
419 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
420 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
417421 // Ptr to the mem node.
418422 LSBaseSDNode *MemNode;
419423 // Offset from the base ptr.
420424 int64_t OffsetFromBase;
425 // What is the sequence number of this mem node.
426 // Lowest mem operand in the DAG starts at zero.
427 unsigned SequenceNum;
421428 };
422429
423430 /// This is a helper function for visitMUL to check the profitability
428435 SDValue &AddNode,
429436 SDValue &ConstNode);
430437
438 /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
439 /// constant build_vector of the stored constant values in Stores.
440 SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
441 ArrayRef Stores,
442 SmallVectorImpl &Chains,
443 EVT Ty) const;
431444
432445 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
433446 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
441454 /// This is a helper function for MergeConsecutiveStores. When the source
442455 /// elements of the consecutive stores are all constants or all extracted
443456 /// vector elements, try to merge them into one larger store.
444 /// \return True if a merged store was created.
445 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl &StoreNodes,
446 EVT MemVT, unsigned NumStores,
447 bool IsConstantSrc, bool UseVector);
457 /// \return number of stores that were merged into a merged store (always
458 /// a prefix of \p StoreNode).
459 bool MergeStoresOfConstantsOrVecElts(
460 SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumStores,
461 bool IsConstantSrc, bool UseVector);
448462
449463 /// This is a helper function for MergeConsecutiveStores.
450464 /// Stores that may be merged are placed in StoreNodes.
451 void getStoreMergeCandidates(StoreSDNode *St,
452 SmallVectorImpl &StoreNodes);
465 /// Loads that may alias with those stores are placed in AliasLoadNodes.
466 void getStoreMergeAndAliasCandidates(
467 StoreSDNode* St, SmallVectorImpl &StoreNodes,
468 SmallVectorImpl &AliasLoadNodes);
453469
454470 /// Helper function for MergeConsecutiveStores. Checks if
455471 /// Candidate stores have indirect dependency through their
461477 /// This optimization uses wide integers or vectors when possible.
462478 /// \return number of stores that were merged into a merged store (the
463479 /// affected nodes are stored as a prefix in \p StoreNodes).
464 bool MergeConsecutiveStores(StoreSDNode *N);
480 bool MergeConsecutiveStores(StoreSDNode *N,
481 SmallVectorImpl &StoreNodes);
465482
466483 /// \brief Try to transform a truncation where C is a constant:
467484 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
15641581 }
15651582
15661583 SmallVector TFs; // List of token factors to visit.
1567 SmallVector Ops; // Ops for replacing token factor.
1584 SmallVector Ops; // Ops for replacing token factor.
15681585 SmallPtrSet SeenOps;
15691586 bool Changed = false; // If we should replace this token factor.
15701587
16081625 }
16091626 }
16101627
1611 // Remove Nodes that are chained to another node in the list. Do so
1612 // by walking up chains breath-first stopping when we've seen
1613 // another operand. In general we must climb to the EntryNode, but we can exit
1614 // early if we find all remaining work is associated with just one operand as
1615 // no further pruning is possible.
1616
1617 // List of nodes to search through and original Ops from which they originate.
1618 SmallVector, 8> Worklist;
1619 SmallVector OpWorkCount; // Count of work for each Op.
1620 SmallPtrSet SeenChains;
1621 bool DidPruneOps = false;
1622
1623 unsigned NumLeftToConsider = 0;
1624 for (const SDValue &Op : Ops) {
1625 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1626 OpWorkCount.push_back(1);
1627 }
1628
1629 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1630 // If this is an Op, we can remove the op from the list. Remark any
1631 // search associated with it as from the current OpNumber.
1632 if (SeenOps.count(Op) != 0) {
1633 Changed = true;
1634 DidPruneOps = true;
1635 unsigned OrigOpNumber = 0;
1636 while (Ops[OrigOpNumber].getNode() != Op && OrigOpNumber < Ops.size())
1637 OrigOpNumber++;
1638 assert((OrigOpNumber != Ops.size()) &&
1639 "expected to find TokenFactor Operand");
1640 // Re-mark worklist from OrigOpNumber to OpNumber
1641 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1642 if (Worklist[i].second == OrigOpNumber) {
1643 Worklist[i].second = OpNumber;
1644 }
1645 }
1646 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1647 OpWorkCount[OrigOpNumber] = 0;
1648 NumLeftToConsider--;
1649 }
1650 // Add if it's a new chain
1651 if (SeenChains.insert(Op).second) {
1652 OpWorkCount[OpNumber]++;
1653 Worklist.push_back(std::make_pair(Op, OpNumber));
1654 }
1655 };
1656
1657 for (unsigned i = 0; i < Worklist.size(); ++i) {
1658 // We need at least be consider at least 2 Ops to prune.
1659 if (NumLeftToConsider <= 1)
1660 break;
1661 auto CurNode = Worklist[i].first;
1662 auto CurOpNumber = Worklist[i].second;
1663 assert((OpWorkCount[CurOpNumber] > 0) &&
1664 "Node should not appear in worklist");
1665 switch (CurNode->getOpcode()) {
1666 case ISD::EntryToken:
1667 // Hitting EntryToken is the only way for the search to terminate without
1668 // hitting
1669 // another operand's search. Prevent us from marking this operand
1670 // considered.
1671 NumLeftToConsider++;
1672 break;
1673 case ISD::TokenFactor:
1674 for (const SDValue &Op : CurNode->op_values())
1675 AddToWorklist(i, Op.getNode(), CurOpNumber);
1676 break;
1677 default:
1678 if (auto *MemNode = dyn_cast(CurNode))
1679 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1680 break;
1681 }
1682 OpWorkCount[CurOpNumber]--;
1683 if (OpWorkCount[CurOpNumber] == 0)
1684 NumLeftToConsider--;
1685 }
1686
16871628 SDValue Result;
16881629
16891630 // If we've changed things around then replace token factor.
16921633 // The entry token is the only possible outcome.
16931634 Result = DAG.getEntryNode();
16941635 } else {
1695 if (DidPruneOps) {
1696 SmallVector PrunedOps;
1697 //
1698 for (const SDValue &Op : Ops) {
1699 if (SeenChains.count(Op.getNode()) == 0)
1700 PrunedOps.push_back(Op);
1701 }
1702 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1703 } else {
1704 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1705 }
1706 }
1707
1708 // Add users to worklist, since we may introduce a lot of new
1709 // chained token factors while removing memory deps.
1710 return CombineTo(N, Result, true /*add to worklist*/);
1636 // New and improved token factor.
1637 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1638 }
1639
1640 // Add users to worklist if AA is enabled, since it may introduce
1641 // a lot of new chained token factors while removing memory deps.
1642 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
1643 : DAG.getSubtarget().useAA();
1644 return CombineTo(N, Result, UseAA /*add to worklist*/);
17111645 }
17121646
17131647 return Result;
66056539 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
66066540 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
66076541
6608 // Simplify TF.
6609 AddToWorklist(NewChain.getNode());
6610
66116542 CombineTo(N, NewValue);
66126543
66136544 // Replace uses of the original load (before extension)
1074810679 // TODO: Handle TRUNCSTORE/LOADEXT
1074910680 if (OptLevel != CodeGenOpt::None &&
1075010681 ISD::isNormalLoad(N) && !LD->isVolatile()) {
10751 // We can forward a direct store or a store off of a tokenfactor.
10752 if (Chain->getOpcode() == ISD::TokenFactor) {
10753 // If we find a potential match AND the only nodes are COPYFROMREG we can
10754 // forward.
10755 bool CanForward = true;
10756 StoreSDNode *ForwardSt = nullptr;
10757 for (const SDValue &ChainOp : Chain->op_values()) {
10758 switch (ChainOp.getNode()->getOpcode()) {
10759 case ISD::CopyToReg:
10760 case ISD::CopyFromReg:
10761 break;
10762 default:
10763 if (!ForwardSt && ISD::isNON_TRUNCStore(ChainOp.getNode())) {
10764 StoreSDNode *St = cast(ChainOp);
10765 if (St->getBasePtr() == Ptr &&
10766 St->getValue().getValueType() == N->getValueType(0)) {
10767 ForwardSt = St;
10768 continue;
10769 }
10770 }
10771 CanForward = false;
10772 }
10773 }
10774 if (CanForward && ForwardSt)
10775 return CombineTo(N, ForwardSt->getOperand(1), Chain);
10776 } else if (ISD::isNON_TRUNCStore(Chain.getNode())) {
10682 if (ISD::isNON_TRUNCStore(Chain.getNode())) {
1077710683 StoreSDNode *PrevST = cast(Chain);
1077810684 if (PrevST->getBasePtr() == Ptr &&
1077910685 PrevST->getValue().getValueType() == N->getValueType(0))
10780 return CombineTo(N, PrevST->getOperand(1), Chain);
10686 return CombineTo(N, Chain.getOperand(1), Chain);
1078110687 }
1078210688 }
1078310689
1079510701 }
1079610702 }
1079710703
10798 if (LD->isUnindexed()) {
10704 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
10705 : DAG.getSubtarget().useAA();
10706 #ifndef NDEBUG
10707 if (CombinerAAOnlyFunc.getNumOccurrences() &&
10708 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
10709 UseAA = false;
10710 #endif
10711 if (UseAA && LD->isUnindexed()) {
1079910712 // Walk up chain skipping non-aliasing memory nodes.
1080010713 SDValue BetterChain = FindBetterChain(N, Chain);
1080110714
1137711290 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
1137811291 ArgChains);
1137911292 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11380 AddToWorklist(Chain.getNode());
1138111293 return true;
1138211294 }
1138311295
1177111683 return false;
1177211684 }
1177311685
11686 SDValue DAGCombiner::getMergedConstantVectorStore(
11687 SelectionDAG &DAG, const SDLoc &SL, ArrayRef Stores,
11688 SmallVectorImpl &Chains, EVT Ty) const {
11689 SmallVector BuildVector;
11690
11691 for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
11692 StoreSDNode *St = cast(Stores[I].MemNode);
11693 Chains.push_back(St->getChain());
11694 BuildVector.push_back(St->getValue());
11695 }
11696
11697 return DAG.getBuildVector(Ty, SL, BuildVector);
11698 }
11699
1177411700 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
1177511701 SmallVectorImpl &StoreNodes, EVT MemVT,
1177611702 unsigned NumStores, bool IsConstantSrc, bool UseVector) {
1177911705 return false;
1178011706
1178111707 int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11708 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11709 unsigned LatestNodeUsed = 0;
11710
11711 for (unsigned i=0; i < NumStores; ++i) {
11712 // Find a chain for the new wide-store operand. Notice that some
11713 // of the store nodes that we found may not be selected for inclusion
11714 // in the wide store. The chain we use needs to be the chain of the
11715 // latest store node which is *used* and replaced by the wide store.
11716 if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11717 LatestNodeUsed = i;
11718 }
11719
11720 SmallVector Chains;
1178211721
1178311722 // The latest Node in the DAG.
11723 LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
1178411724 SDLoc DL(StoreNodes[0].MemNode);
1178511725
1178611726 SDValue StoredVal;
1179611736 assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
1179711737
1179811738 if (IsConstantSrc) {
11799 SmallVector BuildVector;
11800 for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
11801 StoreSDNode *St = cast(StoreNodes[I].MemNode);
11802 SDValue Val = St->getValue();
11803 if (MemVT.getScalarType().isInteger())
11804 if (auto *CFP = dyn_cast(St->getValue()))
11805 Val = DAG.getConstant(
11806 (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
11807 SDLoc(CFP), MemVT);
11808 BuildVector.push_back(Val);
11809 }
11810 StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
11739 StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
1181111740 } else {
1181211741 SmallVector Ops;
1181311742 for (unsigned i = 0; i < NumStores; ++i) {
1181711746 if (Val.getValueType() != MemVT)
1181811747 return false;
1181911748 Ops.push_back(Val);
11749 Chains.push_back(St->getChain());
1182011750 }
1182111751
1182211752 // Build the extracted vector elements back into a vector.
1183611766 for (unsigned i = 0; i < NumStores; ++i) {
1183711767 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
1183811768 StoreSDNode *St = cast(StoreNodes[Idx].MemNode);
11769 Chains.push_back(St->getChain());
1183911770
1184011771 SDValue Val = St->getValue();
1184111772 StoreInt <<= ElementSizeBytes * 8;
1185311784 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
1185411785 }
1185511786
11856 SmallVector Chains;
11857
11858 // Gather all Chains we're inheriting. As generally all chains are
11859 // equal, do minor check to remove obvious redundancies.
11860 Chains.push_back(StoreNodes[0].MemNode->getChain());
11861 for (unsigned i = 1; i < NumStores; ++i)
11862 if (StoreNodes[0].MemNode->getChain() != StoreNodes[i].MemNode->getChain())
11863 Chains.push_back(StoreNodes[i].MemNode->getChain());
11864
11865 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11787 assert(!Chains.empty());
11788
1186611789 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1186711790 SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
1186811791 FirstInChain->getBasePtr(),
1186911792 FirstInChain->getPointerInfo(),
1187011793 FirstInChain->getAlignment());
1187111794
11872 // Replace all merged stores with the new store.
11873 for (unsigned i = 0; i < NumStores; ++i)
11874 CombineTo(StoreNodes[i].MemNode, NewStore);
11875
11876 AddToWorklist(NewChain.getNode());
11795 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11796 : DAG.getSubtarget().useAA();
11797 if (UseAA) {
11798 // Replace all merged stores with the new store.
11799 for (unsigned i = 0; i < NumStores; ++i)
11800 CombineTo(StoreNodes[i].MemNode, NewStore);
11801 } else {
11802 // Replace the last store with the new store.
11803 CombineTo(LatestOp, NewStore);
11804 // Erase all other stores.
11805 for (unsigned i = 0; i < NumStores; ++i) {
11806 if (StoreNodes[i].MemNode == LatestOp)
11807 continue;
11808 StoreSDNode *St = cast(StoreNodes[i].MemNode);
11809 // ReplaceAllUsesWith will replace all uses that existed when it was
11810 // called, but graph optimizations may cause new ones to appear. For
11811 // example, the case in pr14333 looks like
11812 //
11813 // St's chain -> St -> another store -> X
11814 //
11815 // And the only difference from St to the other store is the chain.
11816 // When we change it's chain to be St's chain they become identical,
11817 // get CSEed and the net result is that X is now a use of St.
11818 // Since we know that St is redundant, just iterate.
11819 while (!St->use_empty())
11820 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
11821 deleteAndRecombine(St);
11822 }
11823 }
11824
11825 StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
1187711826 return true;
1187811827 }
1187911828
11880 void DAGCombiner::getStoreMergeCandidates(
11881 StoreSDNode *St, SmallVectorImpl &StoreNodes) {
11829 void DAGCombiner::getStoreMergeAndAliasCandidates(
11830 StoreSDNode* St, SmallVectorImpl &StoreNodes,
11831 SmallVectorImpl &AliasLoadNodes) {
1188211832 // This holds the base pointer, index, and the offset in bytes from the base
1188311833 // pointer.
1188411834 BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
11885 EVT MemVT = St->getMemoryVT();
1188611835
1188711836 // We must have a base and an offset.
1188811837 if (!BasePtr.Base.getNode())
1189211841 if (BasePtr.Base.isUndef())
1189311842 return;
1189411843
11895 // We looking for a root node which is an ancestor to all mergable
11896 // stores. We search up through a load, to our root and then down
11897 // through all children. For instance we will find Store{1,2,3} if
11898 // St is Store1, Store2. or Store3 where the root is not a load
11899 // which always true for nonvolatile ops. TODO: Expand
11900 // the search to find all valid candidates through multiple layers of loads.
11901 //
11902 // Root
11903 // |-------|-------|
11904 // Load Load Store3
11905 // | |
11906 // Store1 Store2
11907 //
11908 // FIXME: We should be able to climb and
11909 // descend TokenFactors to find candidates as well.
11910
11911 SDNode *RootNode = (St->getChain()).getNode();
11912
11913 // Set of Parents of Candidates
11914 std::set CandidateParents;
11915
11916 if (LoadSDNode *Ldn = dyn_cast(RootNode)) {
11917 RootNode = Ldn->getChain().getNode();
11918 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
11919 if (I.getOperandNo() == 0 && isa(*I)) // walk down chain
11920 CandidateParents.insert(*I);
11921 } else
11922 CandidateParents.insert(RootNode);
11923
11924 bool IsLoadSrc = isa(St->getValue());
11925 bool IsConstantSrc = isa(St->getValue()) ||
11926 isa(St->getValue());
11927 bool IsExtractVecSrc =
11928 (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
11929 St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
11930 auto CorrectValueKind = [&](StoreSDNode *Other) -> bool {
11931 if (IsLoadSrc)
11932 return isa(Other->getValue());
11933 if (IsConstantSrc)
11934 return (isa(Other->getValue()) ||
11935 isa(Other->getValue()));
11936 if (IsExtractVecSrc)
11937 return (Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
11938 Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
11939 return false;
11940 };
11941
11942 // check all parents of mergable children
11943 for (auto P = CandidateParents.begin(); P != CandidateParents.end(); ++P)
11944 for (auto I = (*P)->use_begin(), E = (*P)->use_end(); I != E; ++I)
11945 if (I.getOperandNo() == 0)
11946 if (StoreSDNode *OtherST = dyn_cast(*I)) {
11947 if (OtherST->isVolatile() || OtherST->isIndexed())
11948 continue;
11949 // We can merge constant floats to equivalent integers
11950 if (OtherST->getMemoryVT() != MemVT)
11951 if (!(MemVT.isInteger() && MemVT.bitsEq(OtherST->getMemoryVT()) &&
11952 isa(OtherST->getValue())))
11953 continue;
11954 BaseIndexOffset Ptr =
11955 BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
11956 if (Ptr.equalBaseIndex(BasePtr) && CorrectValueKind(OtherST))
11957 StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
11844 // Walk up the chain and look for nodes with offsets from the same
11845 // base pointer. Stop when reaching an instruction with a different kind
11846 // or instruction which has a different base pointer.
11847 EVT MemVT = St->getMemoryVT();
11848 unsigned Seq = 0;
11849 StoreSDNode *Index = St;
11850
11851
11852 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11853 : DAG.getSubtarget().useAA();
11854
11855 if (UseAA) {
11856 // Look at other users of the same chain. Stores on the same chain do not
11857 // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
11858 // to be on the same chain, so don't bother looking at adjacent chains.
11859
11860 SDValue Chain = St->getChain();
11861 for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
11862 if (StoreSDNode *OtherST = dyn_cast(*I)) {
11863 if (I.getOperandNo() != 0)
11864 continue;
11865
11866 if (OtherST->isVolatile() || OtherST->isIndexed())
11867 continue;
11868
11869 if (OtherST->getMemoryVT() != MemVT)
11870 continue;
11871
11872 BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
11873
11874 if (Ptr.equalBaseIndex(BasePtr))
11875 StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
11876 }
11877 }
11878
11879 return;
11880 }
11881
11882 while (Index) {
11883 // If the chain has more than one use, then we can't reorder the mem ops.
11884 if (Index != St && !SDValue(Index, 0)->hasOneUse())
11885 break;
11886
11887 // Find the base pointer and offset for this memory node.
11888 BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
11889
11890 // Check that the base pointer is the same as the original one.
11891 if (!Ptr.equalBaseIndex(BasePtr))
11892 break;
11893
11894 // The memory operands must not be volatile.
11895 if (Index->isVolatile() || Index->isIndexed())
11896 break;
11897
11898 // No truncation.
11899 if (Index->isTruncatingStore())
11900 break;
11901
11902 // The stored memory type must be the same.
11903 if (Index->getMemoryVT() != MemVT)
11904 break;
11905
11906 // We do not allow under-aligned stores in order to prevent
11907 // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
11908 // be irrelevant here; what MATTERS is that we not move memory
11909 // operations that potentially overlap past each-other.
11910 if (Index->getAlignment() < MemVT.getStoreSize())
11911 break;
11912
11913 // We found a potential memory operand to merge.
11914 StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
11915
11916 // Find the next memory operand in the chain. If the next operand in the
11917 // chain is a store then move up and continue the scan with the next
11918 // memory operand. If the next operand is a load save it and use alias
11919 // information to check if it interferes with anything.
11920 SDNode *NextInChain = Index->getChain().getNode();
11921 while (1) {
11922 if (StoreSDNode *STn = dyn_cast(NextInChain)) {
11923 // We found a store node. Use it for the next iteration.
11924 Index = STn;
11925 break;
11926 } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) {
11927 if (Ldn->isVolatile()) {
11928 Index = nullptr;
11929 break;
1195811930 }
11931
11932 // Save the load node for later. Continue the scan.
11933 AliasLoadNodes.push_back(Ldn);
11934 NextInChain = Ldn->getChain().getNode();
11935 continue;
11936 } else {
11937 Index = nullptr;
11938 break;
11939 }
11940 }
11941 }
1195911942 }
1196011943
1196111944 // We need to check that merging these stores does not cause a loop
1198211965 return true;
1198311966 }
1198411967
11985 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
11968 bool DAGCombiner::MergeConsecutiveStores(
11969 StoreSDNode* St, SmallVectorImpl &StoreNodes) {
1198611970 if (OptLevel == CodeGenOpt::None)
1198711971 return false;
1198811972
1201612000 if (MemVT.isVector() && IsLoadSrc)
1201712001 return false;
1201812002
12019 SmallVector StoreNodes;
12020 // Find potential store merge candidates by searching through chain sub-DAG
12021 getStoreMergeCandidates(St, StoreNodes);
12003 // Only look at ends of store sequences.
12004 SDValue Chain = SDValue(St, 0);
12005 if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
12006 return false;
12007
12008 // Save the LoadSDNodes that we find in the chain.
12009 // We need to make sure that these nodes do not interfere with
12010 // any of the store nodes.
12011 SmallVector AliasLoadNodes;
12012
12013 getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
1202212014
1202312015 // Check if there is anything to merge.
1202412016 if (StoreNodes.size() < 2)
1202512017 return false;
1202612018
12027 // Check that we can merge these candidates without causing a cycle
12028 if (!checkMergeStoreCandidatesForDependencies(StoreNodes))
12019 // only do dependence check in AA case
12020 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
12021 : DAG.getSubtarget().useAA();
12022 if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
1202912023 return false;
1203012024
1203112025 // Sort the memory operands according to their distance from the
12032 // base pointer.
12026 // base pointer. As a secondary criteria: make sure stores coming
12027 // later in the code come first in the list. This is important for
12028 // the non-UseAA case, because we're merging stores into the FINAL
12029 // store along a chain which potentially contains aliasing stores.
12030 // Thus, if there are multiple stores to the same address, the last
12031 // one can be considered for merging but not the others.
1203312032 std::sort(StoreNodes.begin(), StoreNodes.end(),
1203412033 [](MemOpLink LHS, MemOpLink RHS) {
12035 return LHS.OffsetFromBase < RHS.OffsetFromBase;
12036 });
12034 return LHS.OffsetFromBase < RHS.OffsetFromBase ||
12035 (LHS.OffsetFromBase == RHS.OffsetFromBase &&
12036 LHS.SequenceNum < RHS.SequenceNum);
12037 });
1203712038
1203812039 // Scan the memory operations on the chain and find the first non-consecutive
1203912040 // store memory address.
12040 unsigned NumConsecutiveStores = 0;
12041 unsigned LastConsecutiveStore = 0;
1204112042 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
12042
12043 // Check that the addresses are consecutive starting from the second
12044 // element in the list of stores.
12045 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
12046 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12047 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12043 for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
12044
12045 // Check that the addresses are consecutive starting from the second
12046 // element in the list of stores.
12047 if (i > 0) {
12048 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12049 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12050 break;
12051 }
12052
12053 // Check if this store interferes with any of the loads that we found.
12054 // If we find a load that alias with this store. Stop the sequence.
12055 if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
12056 return isAlias(Ldn, StoreNodes[i].MemNode);
12057 }))
1204812058 break;
12049 NumConsecutiveStores = i + 1;
12050 }
12051
12052 if (NumConsecutiveStores < 2)
12053 return false;
12059
12060 // Mark this node as useful.
12061 LastConsecutiveStore = i;
12062 }
1205412063
1205512064 // The node with the lowest store address.
12065 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12066 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12067 unsigned FirstStoreAlign = FirstInChain->getAlignment();
1205612068 LLVMContext &Context = *DAG.getContext();
1205712069 const DataLayout &DL = DAG.getDataLayout();
1205812070
1205912071 // Store the constants into memory as one consecutive store.
1206012072 if (IsConstantSrc) {
12061 bool RV = false;
12062 while (NumConsecutiveStores > 1) {
12063 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12064 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12065 unsigned FirstStoreAlign = FirstInChain->getAlignment();
12066 unsigned LastLegalType = 0;
12067 unsigned LastLegalVectorType = 0;
12068 bool NonZero = false;
12069 bool LastLegalVectorNonZero = false;
12070 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12071 StoreSDNode *ST = cast(StoreNodes[i].MemNode);
12072 SDValue StoredVal = ST->getValue();
12073
12074 if (ConstantSDNode *C = dyn_cast(StoredVal)) {
12075 NonZero |= !C->isNullValue();
12076 } else if (ConstantFPSDNode *C =
12077 dyn_cast(StoredVal)) {
12078 NonZero |= !C->getConstantFPValue()->isNullValue();
12079 } else {
12080 // Non-constant.
12081 break;
12082 }
12083
12084 // Find a legal type for the constant store.
12085 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12086 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12087 bool IsFast = false;
12088 if (TLI.isTypeLegal(StoreTy) &&
12089 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12090 FirstStoreAlign, &IsFast) &&
12073 unsigned LastLegalType = 0;
12074 unsigned LastLegalVectorType = 0;
12075 bool NonZero = false;
12076 for (unsigned i=0; i
12077 StoreSDNode *St = cast(StoreNodes[i].MemNode);
12078 SDValue StoredVal = St->getValue();
12079
12080 if (ConstantSDNode *C = dyn_cast(StoredVal)) {
12081 NonZero |= !C->isNullValue();
12082 } else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) {
12083 NonZero |= !C->getConstantFPValue()->isNullValue();
12084 } else {
12085 // Non-constant.
12086 break;
12087 }
12088
12089 // Find a legal type for the constant store.
12090 unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
12091 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12092 bool IsFast;
12093 if (TLI.isTypeLegal(StoreTy) &&
12094 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12095 FirstStoreAlign, &IsFast) && IsFast) {
12096 LastLegalType = i+1;
12097 // Or check whether a truncstore is legal.
12098 } else if (TLI.getTypeAction(Context, StoreTy) ==
12099 TargetLowering::TypePromoteInteger) {
12100 EVT LegalizedStoredValueTy =
12101 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12102 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12103 TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12104 FirstStoreAS, FirstStoreAlign, &IsFast) &&
1209112105 IsFast) {
1209212106 LastLegalType = i + 1;
12093 // Or check whether a truncstore is legal.
12094 } else if (TLI.getTypeAction(Context, StoreTy) ==
12095 TargetLowering::TypePromoteInteger) {
12096 EVT LegalizedStoredValueTy =
12097 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12098 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12099 TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12100 FirstStoreAS, FirstStoreAlign, &IsFast) &&
12101 IsFast) {
12102 LastLegalType = i + 1;
12103 }
12104 }
12105
12106 // We only use vectors if the constant is known to be zero or the target
12107 // allows it and the function is not marked with the noimplicitfloat
12108 // attribute.
12109 if ((!NonZero ||
12110 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
12111 !NoVectors) {
12112 // Find a legal type for the vector store.
12113 EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
12114 if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) &&
12115 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12116 FirstStoreAlign, &IsFast) &&
12117 IsFast)
12118 LastLegalVectorType = i + 1;
12119 LastLegalVectorNonZero = NonZero;
1212012107 }
1212112108 }
1212212109
12123 // Check if we found a legal integer type that creates a meaningful merge.
12124 if (LastLegalType < 2 && LastLegalVectorType < 2)
12125 break;
12126
12127 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
12128 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
12129
12130 bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
12131 true, UseVector);
12132 if (!Merged)
12133 break;
12134 // Remove merged stores for next iteration.
12135 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12136 RV = true;
12137 NumConsecutiveStores -= NumElem;
12138 }
12139 return RV;
12110 // We only use vectors if the constant is known to be zero or the target
12111 // allows it and the function is not marked with the noimplicitfloat
12112 // attribute.
12113 if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
12114 FirstStoreAS)) &&
12115 !NoVectors) {
12116 // Find a legal type for the vector store.
12117 EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
12118 if (TLI.isTypeLegal(Ty) &&
12119 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12120 FirstStoreAlign, &IsFast) && IsFast)
12121 LastLegalVectorType = i + 1;
12122 }
12123 }
12124
12125 // Check if we found a legal integer type to store.
12126 if (LastLegalType == 0 && LastLegalVectorType == 0)
12127 return false;
12128
12129 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
12130 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
12131
12132 return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
12133 true, UseVector);
1214012134 }
1214112135
1214212136 // When extracting multiple vector elements, try to store them
1214312137 // in one vector store rather than a sequence of scalar stores.
1214412138 if (IsExtractVecSrc) {
12145 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12146 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12147 unsigned FirstStoreAlign = FirstInChain->getAlignment();
1214812139 unsigned NumStoresToMerge = 0;
1214912140 bool IsVec = MemVT.isVector();
12150 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12141 for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
1215112142 StoreSDNode *St = cast(StoreNodes[i].MemNode);
1215212143 unsigned StoreValOpcode = St->getValue().getOpcode();
1215312144 // This restriction could be loosened.
1218712178 // Find acceptable loads. Loads need to have the same chain (token factor),
1218812179 // must not be zext, volatile, indexed, and they must be consecutive.
1218912180 BaseIndexOffset LdBasePtr;
12190 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12181 for (unsigned i=0; i; ++i) {
1219112182 StoreSDNode *St = cast(StoreNodes[i].MemNode);
1219212183 LoadSDNode *Ld = dyn_cast(St->getValue());
1219312184 if (!Ld) break;
1222012211 }
1222112212
1222212213 // We found a potential memory operand to merge.
12223 LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
12214 LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
1222412215 }
1222512216
1222612217 if (LoadNodes.size() < 2)
1223212223 if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
1223312224 St->getAlignment() >= RequiredAlignment)
1223412225 return false;
12235 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12236 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12237 unsigned FirstStoreAlign = FirstInChain->getAlignment();
12226
1223812227 LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode);
1223912228 unsigned FirstLoadAS = FirstLoad->getAddressSpace();
1224012229 unsigned FirstLoadAlign = FirstLoad->getAlignment();
1230312292
1230412293 // We add +1 here because the LastXXX variables refer to location while
1230512294 // the NumElem refers to array/index size.
12306 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
12295 unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
1230712296 NumElem = std::min(LastLegalType, NumElem);
1230812297
1230912298 if (NumElem < 2)
1231012299 return false;
1231112300
12312 // Collect the chains from all merged stores. Because the common case
12313 // all chains are the same, check if we match the first Chain.
12301 // Collect the chains from all merged stores.
1231412302 SmallVector MergeStoreChains;
1231512303 MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
12316 for (unsigned i = 1; i < NumElem; ++i)
12317 if (StoreNodes[0].MemNode->getChain() != StoreNodes[i].MemNode->getChain())
12318 MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
12304
12305 // The latest Node in the DAG.
12306 unsigned LatestNodeUsed = 0;
12307 for (unsigned i=1; i
12308 // Find a chain for the new wide-store operand. Notice that some
12309 // of the store nodes that we found may not be selected for inclusion
12310 // in the wide store. The chain we use needs to be the chain of the
12311 // latest store node which is *used* and replaced by the wide store.
12312 if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
12313 LatestNodeUsed = i;
12314
12315 MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
12316 }
12317
12318 LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
1231912319
1232012320 // Find if it is better to use vectors or integers to load and store
1232112321 // to memory.
1233912339 SDValue NewStoreChain =
1234012340 DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
1234112341
12342 AddToWorklist(NewStoreChain.getNode());
12343
1234412342 SDValue NewStore =
1234512343 DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
1234612344 FirstInChain->getPointerInfo(), FirstStoreAlign);
1235212350 SDValue(NewLoad.getNode(), 1));
1235312351 }
1235412352
12355 // Replace the all stores with the new store.
12356 for (unsigned i = 0; i < NumElem; ++i)
12357 CombineTo(StoreNodes[i].MemNode, NewStore);
12353 if (UseAA) {
12354 // Replace the all stores with the new store.
12355 for (unsigned i = 0; i < NumElem; ++i)
12356 CombineTo(StoreNodes[i].MemNode, NewStore);
12357 } else {
12358 // Replace the last store with the new store.
12359 CombineTo(LatestOp, NewStore);
12360 // Erase all other stores.
12361 for (unsigned i = 0; i < NumElem; ++i) {
12362 // Remove all Store nodes.
12363 if (StoreNodes[i].MemNode == LatestOp)
12364 continue;
12365 StoreSDNode *St = cast(StoreNodes[i].MemNode);
12366 DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
12367 deleteAndRecombine(St);
12368 }
12369 }
12370
12371 StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
1235812372 return true;
1235912373 }
1236012374
1251112525 if (SDValue NewST = TransformFPLoadStorePair(N))
1251212526 return NewST;
1251312527
12514 if (ST->isUnindexed()) {
12528 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
12529 : DAG.getSubtarget().useAA();
12530 #ifndef NDEBUG
12531 if (CombinerAAOnlyFunc.getNumOccurrences() &&
12532 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
12533 UseAA = false;
12534 #endif
12535 if (UseAA && ST->isUnindexed()) {
12536 // FIXME: We should do this even without AA enabled. AA will just allow
12537 // FindBetterChain to work in more situations. The problem with this is that
12538 // any combine that expects memory operations to be on consecutive chains
12539 // first needs to be updated to look for users of the same chain.
12540
1251512541 // Walk up chain skipping non-aliasing memory nodes, on this store and any
1251612542 // adjacent stores.
1251712543 if (findBetterNeighborChains(ST)) {
1254512571 if (SimplifyDemandedBits(
1254612572 Value,
1254712573 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12548 ST->getMemoryVT().getScalarSizeInBits()))) {
12549 // Re-visit the store if anything changed; SimplifyDemandedBits
12550 // will add Value's node back to the worklist if necessary, but
12551 // we also need to re-visit the Store node itself.
12552 AddToWorklist(N);
12574 ST->getMemoryVT().getScalarSizeInBits())))
1255312575 return SDValue(N, 0);
12554 }
1255512576 }
1255612577
1255712578 // If this is a load followed by a store to the same location, then the store
1259512616 // There can be multiple store sequences on the same chain.
1259612617 // Keep trying to merge store sequences until we are unable to do so
1259712618 // or until we merge the last store on the chain.
12598 bool Changed = MergeConsecutiveStores(ST);
12619 SmallVector StoreNodes;
12620 bool Changed = MergeConsecutiveStores(ST, StoreNodes);
1259912621 if (!Changed) break;
12600 // Return N as merge only uses CombineTo and no worklist clean
12601 // up is necessary.
12602 if (N->getOpcode() == ISD::DELETED_NODE || !isa(N))
12622
12623 if (any_of(StoreNodes,
12624 [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) {
12625 // ST has been merged and no longer exists.
1260312626 return SDValue(N, 0);
12627 }
1260412628 }
1260512629 }
1260612630
1260912633 // Make sure to do this only after attempting to merge stores in order to
1261012634 // avoid changing the types of some subset of stores due to visit order,
1261112635 // preventing their merging.
12612 if (isa(ST->getValue())) {
12636 if (isa(Value)) {
1261312637 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
1261412638 return NewSt;
1261512639 }
1354313567 // A vector built entirely of undefs is undef.
1354413568 if (ISD::allOperandsUndef(N))
1354513569 return DAG.getUNDEF(VT);
13546
13547 // Check if we can express BUILD VECTOR via subvector extract.
13548 if (!LegalTypes && (N->getNumOperands() > 1)) {
13549 SDValue Op0 = N->getOperand(0);
13550 auto checkElem = [&](SDValue Op) -> uint64_t {
13551 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
13552 (Op0.getOperand(0) == Op.getOperand(0)))
13553 if (auto CNode = dyn_cast(Op.getOperand(1)))
13554 return CNode->getZExtValue();
13555 return -1;
13556 };
13557
13558 int Offset = checkElem(Op0);
13559 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
13560 if (Offset + i != checkElem(N->getOperand(i))) {
13561 Offset = -1;
13562 break;
13563 }
13564 }
13565
13566 if ((Offset == 0) &&
13567 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
13568 return Op0.getOperand(0);
13569 if ((Offset != -1) &&
13570 ((Offset % N->getValueType(0).getVectorNumElements()) ==
13571 0)) // IDX must be multiple of output size.
13572 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
13573 Op0.getOperand(0), Op0.getOperand(1));
13574 }
1357513570
1357613571 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
1357713572 return V;
1573515730 ++Depth;
1573615731 break;
1573715732
15738 case ISD::CopyFromReg:
15739 // Forward past CopyFromReg.
15740 Chains.push_back(Chain.getOperand(0));
15741 ++Depth;
15742 break;
15743
1574415733 default:
1574515734 // For all other instructions we will just have to take what we can get.
1574615735 Aliases.push_back(Chain);
1576915758 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
1577015759 }
1577115760
15772 // This function tries to collect a bunch of potentially interesting
15773 // nodes to improve the chains of, all at once. This might seem
15774 // redundant, as this function gets called when visiting every store
15775 // node, so why not let the work be done on each store as it's visited?
15776 //
15777 // I believe this is mainly important because MergeConsecutiveStores
15778 // is unable to deal with merging stores of different sizes, so unless
15779 // we improve the chains of all the potential candidates up-front
15780 // before running MergeConsecutiveStores, it might only see some of
15781 // the nodes that will eventually be candidates, and then not be able
15782 // to go from a partially-merged state to the desired final
15783 // fully-merged state.
1578415761 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
1578515762 // This holds the base pointer, index, and the offset in bytes from the base
1578615763 // pointer.
1581615793 if (!Ptr.equalBaseIndex(BasePtr))
1581715794 break;
1581815795
15819 // Walk up the chain to find the next store node, ignoring any
15820 // intermediate loads. Any other kind of node will halt the loop.
15796 // Find the next memory operand in the chain. If the next operand in the
15797 // chain is a store then move up and continue the scan with the next
15798 // memory operand. If the next operand is a load save it and use alias
15799 // information to check if it interferes with anything.
1582115800 SDNode *NextInChain = Index->getChain().getNode();
1582215801 while (true) {
1582315802 if (StoreSDNode *STn = dyn_cast(NextInChain)) {
1583615815 Index = nullptr;
1583715816 break;
1583815817 }
15839 } // end while
15840 }
15841
15842 // At this point, ChainedStores lists all of the Store nodes
15843 // reachable by iterating up through chain nodes matching the above
15844 // conditions. For each such store identified, try to find an
15845 // earlier chain to attach the store to which won't violate the
15846 // required ordering.
15818 }
15819 }
15820
1584715821 bool MadeChangeToSt = false;
1584815822 SmallVector, 8> BetterChains;
1584915823
850850 MinFunctionAlignment = 0;
851851 PrefFunctionAlignment = 0;
852852 PrefLoopAlignment = 0;
853 GatherAllAliasesMaxDepth = 18;
853 GatherAllAliasesMaxDepth = 6;
854854 MinStackArgumentAlignment = 1;
855855 // TODO: the default will be switched to 0 in the next commit, along
856856 // with the Target-specific changes necessary.
92649264 return SDValue();
92659265 }
92669266
9267 /// This function handles the log2-shuffle pattern produced by the
9267 /// This function handles the log2-shuffle pattern produced by the
92689268 /// LoopVectorizer for the across vector reduction. It consists of
92699269 /// log2(NumVectorElements) steps and, in each step, 2^(s) elements
92709270 /// are reduced, where s is an induction variable from 0 to
448448 setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
449449
450450 PredictableSelectIsExpensive = false;
451
452 // We want to find all load dependencies for long chains of stores to enable
453 // merging into very wide vectors. The problem is with vectors with > 4
454 // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
455 // vectors are a legal type, even though we have to split the loads
456 // usually. When we can more precisely specify load legality per address
457 // space, we should be able to make FindBetterChain/MergeConsecutiveStores
458 // smarter so that they can figure out what to do in 2 iterations without all
459 // N > 4 stores on the same chain.
460 GatherAllAliasesMaxDepth = 16;
451461
452462 // FIXME: Need to really handle these.
453463 MaxStoresPerMemcpy = 4096;
498498
499499 bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
500500 unsigned &Cost) const override;
501
502 bool canMergeStoresTo(EVT MemVT) const override {
503 // Do not merge to larger than i32.
504 return (MemVT.getSizeInBits() <= 32);
505 }
506501
507502 bool isCheapToSpeculateCttz() const override;
508503 bool isCheapToSpeculateCtlz() const override;
5858 }
5959
6060 ; [2 x float] should not be promoted to double by the Darwin varargs handling,
61 ; but should go in an 8-byte aligned slot and can be merged as integer stores.
61 ; but should go in an 8-byte aligned slot.
6262 define void @test_varargs_stackalign() {
6363 ; CHECK-LABEL: test_varargs_stackalign:
64 ; CHECK-DARWINPCS: str {{x[0-9]+}}, [sp, #16]
64 ; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
6565
6666 call void(...) @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
6767 ret void
204204 define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind {
205205 entry:
206206 ; CHECK-LABEL: test8
207 ; CHECK: str w8, [sp]
207 ; CHECK: strb {{w[0-9]+}}, [sp, #3]
208 ; CHECK: strb wzr, [sp, #2]
209 ; CHECK: strb {{w[0-9]+}}, [sp, #1]
210 ; CHECK: strb wzr, [sp]
208211 ; CHECK: bl
209212 ; FAST-LABEL: test8
210213 ; FAST: strb {{w[0-9]+}}, [sp]
1212 entry:
1313 ; CHECK-LABEL: t2:
1414 ; CHECK: strh wzr, [sp, #32]
15 ; CHECK: stp xzr, xzr, [sp, #8]
16 ; CHECK: str xzr, [sp, #24]
15 ; CHECK: stp xzr, xzr, [sp, #16]
16 ; CHECK: str xzr, [sp, #8]
1717 %buf = alloca [26 x i8], align 1
1818 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
1919 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
9898 ; __stack field should point just past them.
9999 define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {
100100 ; CHECK-LABEL: test_offsetstack:
101 ; CHECK: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #-80]!
101 ; CHECK: sub sp, sp, #80
102102 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96
103103 ; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
104104 ; CHECK: str [[STACK_TOP]], [x[[VAR]]]
33 @g0 = external global <3 x float>, align 16
44 @g1 = external global <3 x float>, align 4
55
6 ; CHECK: ldr q[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]], :lo12:g0
6 ; CHECK: ldr s[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]]{{\]}}, #4
7 ; CHECK: ld1{{\.?s?}} { v[[R0]]{{\.?s?}} }[1], {{\[}}[[R1]]{{\]}}
78 ; CHECK: str d[[R0]]
89
910 define void @blam() {
None ; RUN: llc < %s | FileCheck %s
0 ; RUN: llc --combiner-alias-analysis=false < %s | FileCheck %s
1 ; RUN: llc --combiner-alias-analysis=true < %s | FileCheck %s
12
23 ; This test checks that we do not merge stores together which have
34 ; dependencies through their non-chain operands (e.g. one store is the
None ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
1 ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP
0 ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s
21
3 ; This test expects that we have one instance for each line in some order with "s_nop 0" instances after each.
2 ; CHECK: test01.cl:2:{{[0-9]+}}
3 ; CHECK-NEXT: s_nop 0
44
5 ; Check that each line appears at least once
6 ; CHECK-DAG: test01.cl:2:3
7 ; CHECK-DAG: test01.cl:3:3
8 ; CHECK-DAG: test01.cl:4:3
5 ; CHECK: test01.cl:3:{{[0-9]+}}
6 ; CHECK-NEXT: s_nop 0
97
10
11 ; Check that each of each of the lines consists of the line output, followed by "s_nop 0"
12 ; CHECKNOP: test01.cl:{{[234]}}:3
13 ; CHECKNOP-NEXT: s_nop 0
14 ; CHECKNOP: test01.cl:{{[234]}}:3
15 ; CHECKNOP-NEXT: s_nop 0
16 ; CHECKNOP: test01.cl:{{[234]}}:3
17 ; CHECKNOP-NEXT: s_nop 0
8 ; CHECK: test01.cl:4:{{[0-9]+}}
9 ; CHECK-NEXT: s_nop 0
1810
1911 ; CHECK: test01.cl:5:{{[0-9]+}}
2012 ; CHECK-NEXT: s_nop 0
2820 call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !17, metadata !18), !dbg !19
2921 %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !20
3022 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
31 store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !20
23 store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21
3224 %1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !22
3325 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
3426 store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23
256256
257257 ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
258258 ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
259 ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
260 ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
259 ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
260
261 ; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
261262
262263 ; GCN: buffer_load_ubyte
263264 ; GCN: buffer_load_ubyte
None ; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
0 ; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
2
3 ; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
4 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
25
36 ; This test is mostly to test DAG store merging, so disable the vectorizer.
47 ; Run with devices with different unaligned load restrictions.
146149 }
147150
148151 ; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
149 ; GCN-AA: buffer_store_dwordx4 v
152 ; GCN-NOAA: buffer_store_dwordx4 v
153
154 ; GCN-AA: buffer_store_dwordx2
155 ; GCN-AA: buffer_store_dword v
156 ; GCN-AA: buffer_store_dword v
157
150158 ; GCN: s_endpgm
151159 define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
152160 %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
465473 ret void
466474 }
467475
476 ; This works once AA is enabled on the subtarget
468477 ; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
469478 ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
470 ; GCN: buffer_store_dwordx4 [[LOAD]]
479
480 ; GCN-NOAA: buffer_store_dword v
481 ; GCN-NOAA: buffer_store_dword v
482 ; GCN-NOAA: buffer_store_dword v
483 ; GCN-NOAA: buffer_store_dword v
484
485 ; GCN-AA: buffer_store_dwordx4 [[LOAD]]
486
471487 ; GCN: s_endpgm
472488 define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
473489 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
3131 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
3232 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:28{{$}}
3333
34 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
35 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
36 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
37 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
34 ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
35 ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
36 ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
37 ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
3838 define void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
3939 entry:
4040 %tid = call i32 @llvm.amdgcn.workitem.id.x()
129129 ; HSA-ELT8: private_element_size = 2
130130 ; HSA-ELT4: private_element_size = 1
131131
132 ; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], s9{{$}}
133 ; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], s9 offset:8
132 ; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9{{$}}
133 ; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
134134
135135 ; HSA-ELTGE8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
136136
156156
157157 ; FUNC-LABEL: @reorder_local_offsets
158158 ; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:100 offset1:102
159 ; CI-DAG: ds_write2_b32 {{v[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:3 offset1:100
160 ; CI-DAG: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408
159 ; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:3 offset1:100
160 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
161 ; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408
161162 ; CI: buffer_store_dword
162163 ; CI: s_endpgm
163164 define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 {
179180 }
180181
181182 ; FUNC-LABEL: @reorder_global_offsets
182 ; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
183 ; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
184 ; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
185 ; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
186 ; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
187 ; CI: buffer_store_dword
183 ; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
184 ; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
185 ; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
186 ; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
187 ; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
188 ; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
188189 ; CI: s_endpgm
189190 define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
190191 %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3
1111 entry:
1212 ; CHECK: sub sp, sp, #12
1313 ; CHECK: sub sp, sp, #4
14 ; CHECK: add r0, sp, #4
15 ; CHECK: stm sp, {r0, r1, r2, r3}
14 ; CHECK: stmib sp, {r1, r2, r3}
1615 %g = alloca i8*
1716 %g1 = bitcast i8** %g to i8*
1817 call void @llvm.va_start(i8* %g1)
None ; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s
0 ; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=NO-REALIGN
1 ; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=REALIGN
12
23 ; rdar://12713765
34 ; When realign-stack is set to false, make sure we are not creating stack
67
78 define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
89 entry:
9 ; CHECK-LABEL: test1
10 ; CHECK: ldr r[[R1:[0-9]+]], [pc, r1]
11 ; CHECK: add r[[R2:[0-9]+]], r1, #48
12 ; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
13 ; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
14 ; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
15 ; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
16 ; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32
17 ; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
18 ; CHECK: mov r[[R1:[0-9]+]], sp
19 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
20 ; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32
21 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
22 ; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
23 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
24 ; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
25 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
26 ; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
27 ; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
28 ; CHECK: add r[[R1:[0-9]+]], r0, #48
29 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
30 ; CHECK: add r[[R1:[0-9]+]], r0, #32
31 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
32 ; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]!
33 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
10 ; NO-REALIGN-LABEL: test1
11 ; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
12 ; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
13 ; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
14 ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
15 ; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
16 ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48
17 ; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
18
19 ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
20 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
21 ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
22 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
23 ; NO-REALIGN: mov r[[R3:[0-9]+]], r[[R1]]
24 ; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]!
25 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]
26
27 ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48
28 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
29 ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32
30 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
31 ; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
32 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
3433 %retval = alloca <16 x float>, align 16
3534 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
3635 store <16 x float> %0, <16 x float>* %retval
4140
4241 define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
4342 entry:
44 ; CHECK: ldr r[[R1:[0-9]+]], [pc, r1]
45 ; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48
46 ; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
47 ; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
48 ; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
49 ; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
50 ; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32
51 ; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
52 ; CHECK: mov r[[R1:[0-9]+]], sp
53 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
54 ; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #32
55 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
56 ; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
57 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
58 ; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
59 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
60 ; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
61 ; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
62 ; CHECK: add r[[R1:[0-9]+]], r0, #48
63 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
64 ; CHECK: add r[[R1:[0-9]+]], r0, #32
65 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
66 ; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]!
67 ; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
43 ; REALIGN-LABEL: test2
44 ; REALIGN: bfc sp, #0, #6
45 ; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
46 ; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
47 ; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
48 ; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
49 ; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
50 ; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48
51 ; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
6852
6953
70 %retval = alloca <16 x float>, align 16
54 ; REALIGN: orr r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
55 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
56 ; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #32
57 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
58 ; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #16
59 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
60 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
61
62 ; REALIGN: add r[[R1:[0-9]+]], r[[R0:0]], #48
63 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
64 ; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32
65 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
66 ; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
67 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
68 %retval = alloca <16 x float>, align 16
7169 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
7270 store <16 x float> %0, <16 x float>* %retval
7371 %1 = load <16 x float>, <16 x float>* %retval
1515 ; an LDMIA was created with both a FrameIndex and an offset, which
1616 ; is not allowed.
1717
18 ; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
19 ; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
18 ; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
19 ; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
2020
21 ; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
22 ; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
21 ; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
22 ; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
2323
2424 ; We also want to ensure the register scavenger is working (i.e. an
2525 ; offset from sp can be generated), so we need two spills.
26 ; CHECK-WITHOUT-LDRD-DAG: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
27 ; CHECK-WITHOUT-LDRD-DAG: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
28 ; CHECK-WITHOUT-LDRD-DAG: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
26 ; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
27 ; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
28 ; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
2929
3030 ; In principle LLVM may have to recalculate the offset. At the moment
3131 ; it reuses the original though.
32 ; CHECK-WITHOUT-LDRD-DAG: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
33 ; CHECK-WITHOUT-LDRD-DAG: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
32 ; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
33 ; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
3434
3535 store volatile i64 %val1, i64* %addr
3636 store volatile i64 %val2, i64* %addr
88 ; CHECK-LABEL: t:
99 ; CHECK: vpop {d8}
1010 ; CHECK-NOT: vpopne
11 ; CHECK: pop {r7, pc}
12 ; CHECK: vpop {d8}
1113 ; CHECK: pop {r7, pc}
1214 br i1 undef, label %if.else, label %if.then
1315
55 ; CHECK: movs [[VAL:r[0-9]+]], #42
66 ; CHECK: movt r[[BASE1]], #15
77
8 ; CHECK-DAG: str [[VAL]], [r[[BASE1]]]
9 ; CHECK-DAG: str [[VAL]], [r[[BASE1]], #24]
10 ; CHECK-DAG: str.w [[VAL]], [r[[BASE1]], #42]
8 ; CHECK: str [[VAL]], [r[[BASE1]]]
9 ; CHECK: str [[VAL]], [r[[BASE1]], #24]
10 ; CHECK: str.w [[VAL]], [r[[BASE1]], #42]
1111
1212 ; CHECK: movw r[[BASE2:[0-9]+]], #20394
1313 ; CHECK: movt r[[BASE2]], #18
1212
1313 ; Function Attrs: nounwind uwtable
1414 define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" {
15 ; CHECK: r2 = r10
16 ; CHECK: r2 += -2
17 ; CHECK: r1 = 0
18 ; CHECK: *(u16 *)(r2 + 6) = r1
19 ; CHECK: *(u16 *)(r2 + 4) = r1
20 ; CHECK: *(u16 *)(r2 + 2) = r1
21 ; CHECK: r2 = 6
22 ; CHECK: *(u8 *)(r10 - 7) = r2
23 ; CHECK: r2 = 5
24 ; CHECK: *(u8 *)(r10 - 8) = r2
25 ; CHECK: r2 = 7
26 ; CHECK: *(u8 *)(r10 - 6) = r2
27 ; CHECK: r2 = 8
28 ; CHECK: *(u8 *)(r10 - 5) = r2
29 ; CHECK: r2 = 9
30 ; CHECK: *(u8 *)(r10 - 4) = r2
31 ; CHECK: r2 = 10
32 ; CHECK: *(u8 *)(r10 - 3) = r2
33 ; CHECK: *(u16 *)(r10 + 24) = r1
34 ; CHECK: *(u16 *)(r10 + 22) = r1
35 ; CHECK: *(u16 *)(r10 + 20) = r1
36 ; CHECK: *(u16 *)(r10 + 18) = r1
37 ; CHECK: *(u16 *)(r10 + 16) = r1
38 ; CHECK: *(u16 *)(r10 + 14) = r1
39 ; CHECK: *(u16 *)(r10 + 12) = r1
40 ; CHECK: *(u16 *)(r10 + 10) = r1
41 ; CHECK: *(u16 *)(r10 + 8) = r1
42 ; CHECK: *(u16 *)(r10 + 6) = r1
43 ; CHECK: *(u16 *)(r10 - 2) = r1
44 ; CHECK: *(u16 *)(r10 + 26) = r1
45 ; CHECK: r2 = r10
46 ; CHECK: r2 += -8
47 ; CHECK: r1 = ll
48 ; CHECK: call bpf_map_lookup_elem
49 ; CHECK: exit
5015 %key = alloca %struct.routing_key_2, align 1
5116 %1 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 0
17 ; CHECK: r1 = 5
18 ; CHECK: *(u8 *)(r10 - 8) = r1
5219 store i8 5, i8* %1, align 1
5320 %2 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 1
21 ; CHECK: r1 = 6
22 ; CHECK: *(u8 *)(r10 - 7) = r1
5423 store i8 6, i8* %2, align 1
5524 %3 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 2
25 ; CHECK: r1 = 7
26 ; CHECK: *(u8 *)(r10 - 6) = r1
5627 store i8 7, i8* %3, align 1
5728 %4 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 3
29 ; CHECK: r1 = 8
30 ; CHECK: *(u8 *)(r10 - 5) = r1
5831 store i8 8, i8* %4, align 1
5932 %5 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 4
33 ; CHECK: r1 = 9
34 ; CHECK: *(u8 *)(r10 - 4) = r1
6035 store i8 9, i8* %5, align 1
6136 %6 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 5
37 ; CHECK: r1 = 10
38 ; CHECK: *(u8 *)(r10 - 3) = r1
6239 store i8 10, i8* %6, align 1
6340 %7 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 1, i32 0, i64 0
41 ; CHECK: r1 = r10
42 ; CHECK: r1 += -2
43 ; CHECK: r2 = 0
44 ; CHECK: *(u16 *)(r1 + 6) = r2
45 ; CHECK: *(u16 *)(r1 + 4) = r2
46 ; CHECK: *(u16 *)(r1 + 2) = r2
47 ; CHECK: *(u16 *)(r10 + 24) = r2
48 ; CHECK: *(u16 *)(r10 + 22) = r2
49 ; CHECK: *(u16 *)(r10 + 20) = r2
50 ; CHECK: *(u16 *)(r10 + 18) = r2
51 ; CHECK: *(u16 *)(r10 + 16) = r2
52 ; CHECK: *(u16 *)(r10 + 14) = r2
53 ; CHECK: *(u16 *)(r10 + 12) = r2
54 ; CHECK: *(u16 *)(r10 + 10) = r2
55 ; CHECK: *(u16 *)(r10 + 8) = r2
56 ; CHECK: *(u16 *)(r10 + 6) = r2
57 ; CHECK: *(u16 *)(r10 - 2) = r2
58 ; CHECK: *(u16 *)(r10 + 26) = r2
6459 call void @llvm.memset.p0i8.i64(i8* %7, i8 0, i64 30, i32 1, i1 false)
6560 %8 = call i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...) bitcast (i32 (...)* @bpf_map_lookup_elem to i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...)*)(%struct.bpf_map_def* nonnull @routing, %struct.routing_key_2* nonnull %key) #3
6661 ret i32 undef
None ; RUN: llc -march=msp430 < %s | FileCheck %s
0 ; RUN: llc -march=msp430 -combiner-alias-analysis < %s | FileCheck %s
11 target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
22 target triple = "msp430-generic-generic"
33 @foo = common global i16 0, align 2
6262 ; NEW-DAG: sd $5, 16([[R2]])
6363
6464 ; O32 has run out of argument registers and starts using the stack
65 ; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 16($sp)
66 ; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 20($sp)
65 ; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 24($sp)
66 ; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 28($sp)
6767 ; O32-DAG: sw [[R3]], 24([[R2]])
6868 ; O32-DAG: sw [[R4]], 28([[R2]])
6969 ; NEW-DAG: sd $6, 24([[R2]])
7070
71 ; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 24($sp)
72 ; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 28($sp)
71 ; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp)
72 ; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp)
7373 ; O32-DAG: sw [[R3]], 32([[R2]])
7474 ; O32-DAG: sw [[R4]], 36([[R2]])
7575 ; NEW-DAG: sd $7, 32([[R2]])
7676
77 ; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp)
78 ; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp)
77 ; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp)
78 ; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp)
7979 ; O32-DAG: sw [[R3]], 40([[R2]])
8080 ; O32-DAG: sw [[R4]], 44([[R2]])
8181 ; NEW-DAG: sd $8, 40([[R2]])
8282
83 ; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp)
84 ; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp)
83 ; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp)
84 ; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp)
8585 ; O32-DAG: sw [[R3]], 48([[R2]])
8686 ; O32-DAG: sw [[R4]], 52([[R2]])
8787 ; NEW-DAG: sd $9, 48([[R2]])
8888
89 ; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp)
90 ; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp)
89 ; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 56($sp)
90 ; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 60($sp)
9191 ; O32-DAG: sw [[R3]], 56([[R2]])
9292 ; O32-DAG: sw [[R4]], 60([[R2]])
9393 ; NEW-DAG: sd $10, 56([[R2]])
9494
9595 ; N32/N64 have run out of registers and starts using the stack too
96 ; O32-DAG: lw [[R3:\$[0-9]+]], 56($sp)
97 ; O32-DAG: lw [[R4:\$[0-9]+]], 60($sp)
96 ; O32-DAG: lw [[R3:\$[0-9]+]], 64($sp)
97 ; O32-DAG: lw [[R4:\$[0-9]+]], 68($sp)
9898 ; O32-DAG: sw [[R3]], 64([[R2]])
9999 ; O32-DAG: sw [[R4]], 68([[R2]])
100100 ; NEW-DAG: ld [[R3:\$[0-9]+]], 0($sp)
279279 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
280280 ; space.
281281 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
282 ; O32-DAG: sw [[VA]], 0([[SP]])
283
284 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
285 ; N32-DAG: sw [[VA]], 0([[SP]])
286
287 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
288 ; N64-DAG: sd [[VA]], 0([[SP]])
289
290 ; Store [[VA]]
291 ; O32-DAG: sw [[VA]], 0([[SP]])
292
293 ; ALL: teqi $zero, 1
294
295 ; Increment [[VA]] (and realign pointer for O32)
296 ; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
297 ; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
298 ; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8
299 ; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
300 ; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
301 ; O32-DAG: sw [[VA2]], 0([[SP]])
302
303 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
304 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
305 ; N32-DAG: sw [[VA2]], 0([[SP]])
306
307 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
308 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
309 ; N64-DAG: sd [[VA2]], 0([[SP]])
310
311 ; Load the first argument from the variable portion and copy it to the global.
312 ; This has used the stack pointer directly rather than the [[VA]] we just set
313 ; up.
314 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
315 ; order.
316 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
317 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA_TMP2]])
318 ; O32-DAG: sw [[ARG1]], 8([[GV]])
319 ; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
320 ; O32-DAG: sw [[VA3]], 0([[SP]])
321 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]])
322 ; O32-DAG: sw [[ARG1]], 12([[GV]])
323
324 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
325 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)(
326 ; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
327 ; NEW-DAG: sd [[ARG1]], 8([[GV]])
328
329 ; ALL: teqi $zero, 2
330
331 ; Increment [[VA]] again.
332 ; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
333 ; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
334 ; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
335 ; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
336 ; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
337 ; O32-DAG: sw [[VA2]], 0([[SP]])
338
339 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
340 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
341 ; N32-DAG: sw [[VA3]], 0([[SP]])
342
343 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
344 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
345 ; N64-DAG: sd [[VA3]], 0([[SP]])
346
347 ; Load the second argument from the variable portion and copy it to the global.
348 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
349 ; O32-DAG: sw [[ARG2]], 16([[GV]])
350 ; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
351 ; O32-DAG: sw [[VA3]], 0([[SP]])
352 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]])
353 ; O32-DAG: sw [[ARG2]], 20([[GV]])
354
355 ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
356 ; NEW-DAG: sd [[ARG2]], 16([[GV]])
357
358 %ap = alloca i8*, align 8
359 %ap2 = bitcast i8** %ap to i8*
360 call void @llvm.va_start(i8* %ap2)
361
362 call void asm sideeffect "teqi $$zero, 1", ""()
363 %arg1 = va_arg i8** %ap, i64
364 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
365 store volatile i64 %arg1, i64* %e1, align 8
366
367 call void asm sideeffect "teqi $$zero, 2", ""()
368 %arg2 = va_arg i8** %ap, i64
369 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
370 store volatile i64 %arg2, i64* %e2, align 8
371
372 call void @llvm.va_end(i8* %ap2)
373
374 ret void
375 }
376
377 define void @fn_i32_dotdotdot_i16(i32 %a, ...) {
378 entry:
379 ; ALL-LABEL: fn_i32_dotdotdot_i16:
380
381 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
382 ; the argument save area (56 bytes).
383 ; O32: addiu [[SP:\$sp]], $sp, -8
384 ; N32: addiu [[SP:\$sp]], $sp, -64
385 ; N64: daddiu [[SP:\$sp]], $sp, -64
386
387 ; Save variable argument portion on the stack
388 ; O32-DAG: sw $7, 20([[SP]])
389 ; O32-DAG: sw $6, 16([[SP]])
390 ; O32-DAG: sw $5, 12([[SP]])
391
392 ; NEW-DAG: sd $11, 56([[SP]])
393 ; NEW-DAG: sd $10, 48([[SP]])
394 ; NEW-DAG: sd $9, 40([[SP]])
395 ; NEW-DAG: sd $8, 32([[SP]])
396 ; NEW-DAG: sd $7, 24([[SP]])
397 ; NEW-DAG: sd $6, 16([[SP]])
398 ; NEW-DAG: sd $5, 8([[SP]])
399
400 ; Initialize variable argument pointer.
401 ; For O32, the offset is 12 due to the 4 bytes used to store local variables,
402 ; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
403 ; fixed argument.
404 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
405 ; space.
406 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
407 ; O32-DAG: sw [[VA]], 0([[SP]])
408
409 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
410 ; N32-DAG: sw [[VA]], 0([[SP]])
411
412 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
413 ; N64-DAG: sd [[VA]], 0([[SP]])
414
415 ; Store [[VA]]
416 ; O32-DAG: sw [[VA]], 0([[SP]])
417
418 ; ALL: teqi $zero, 1
419
420 ; Increment [[VA]]
421 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
422 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
423 ; O32-DAG: sw [[VA2]], 0([[SP]])
424
425 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
426 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
427 ; N32-DAG: sw [[VA2]], 0([[SP]])
428
429 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
430 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
431 ; N64-DAG: sd [[VA2]], 0([[SP]])
432
433 ; Load the first argument from the variable portion.
434 ; This has used the stack pointer directly rather than the [[VA]] we just set
435 ; up.
436 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
437 ; order.
438 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
439
440 ; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
441 ; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
442
443 ; Copy the arg to the global
444 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
445
446 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
447
448 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)(
449
450 ; ALL-DAG: sh [[ARG1]], 2([[GV]])
451
452 ; ALL: teqi $zero, 2
453
454 ; Increment [[VA]] again.
455 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
456 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
457 ; O32-DAG: sw [[VA2]], 0([[SP]])
458
459 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
460 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
461 ; N32-DAG: sw [[VA3]], 0([[SP]])
462
463 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
464 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
465 ; N64-DAG: sd [[VA3]], 0([[SP]])
466
467 ; Load the second argument from the variable portion.
468 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
469
470 ; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
471 ; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
472
473 ; Copy the arg to the global
474 ; ALL-DAG: sh [[ARG2]], 4([[GV]])
475
476 %ap = alloca i8*, align 8
477 %ap2 = bitcast i8** %ap to i8*
478 call void @llvm.va_start(i8* %ap2)
479
480 call void asm sideeffect "teqi $$zero, 1", ""()
481 %arg1 = va_arg i8** %ap, i16
482 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
483 store volatile i16 %arg1, i16* %e1, align 2
484
485 call void asm sideeffect "teqi $$zero, 2", ""()
486 %arg2 = va_arg i8** %ap, i16
487 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
488 store volatile i16 %arg2, i16* %e2, align 2
489
490 call void @llvm.va_end(i8* %ap2)
491
492 ret void
493 }
494
495 define void @fn_i32_dotdotdot_i32(i32 %a, ...) {
496 entry:
497 ; ALL-LABEL: fn_i32_dotdotdot_i32:
498
499 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
500 ; the argument save area (56 bytes).
501 ; O32: addiu [[SP:\$sp]], $sp, -8
502 ; N32: addiu [[SP:\$sp]], $sp, -64
503 ; N64: daddiu [[SP:\$sp]], $sp, -64
504
505 ; Save variable argument portion on the stack
506 ; O32-DAG: sw $7, 20([[SP]])
507 ; O32-DAG: sw $6, 16([[SP]])
508 ; O32-DAG: sw $5, 12([[SP]])
509
510 ; NEW-DAG: sd $11, 56([[SP]])
511 ; NEW-DAG: sd $10, 48([[SP]])
512 ; NEW-DAG: sd $9, 40([[SP]])
513 ; NEW-DAG: sd $8, 32([[SP]])
514 ; NEW-DAG: sd $7, 24([[SP]])
515 ; NEW-DAG: sd $6, 16([[SP]])
516 ; NEW-DAG: sd $5, 8([[SP]])
517
518 ; Initialize variable argument pointer.
519 ; For O32, the offset is 12 due to the 4 bytes used to store local variables,
520 ; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
521 ; fixed argument.
522 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
523 ; space.
524 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
525 ; O32-DAG: sw [[VA]], 0([[SP]])
526
527 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
528 ; N32-DAG: sw [[VA]], 0([[SP]])
529
530 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
531 ; N64-DAG: sd [[VA]], 0([[SP]])
532
533 ; Store [[VA]]
534 ; O32-DAG: sw [[VA]], 0([[SP]])
535
536 ; ALL: teqi $zero, 1
537
538 ; Increment [[VA]]
539 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
540 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
541 ; O32-DAG: sw [[VA2]], 0([[SP]])
542
543 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
544 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
545 ; N32-DAG: sw [[VA2]], 0([[SP]])
546
547 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
548 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
549 ; N64-DAG: sd [[VA2]], 0([[SP]])
550
551 ; Load the first argument from the variable portion.
552 ; This has used the stack pointer directly rather than the [[VA]] we just set
553 ; up.
554 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
555 ; order.
556 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
557
558 ; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
559 ; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
560
561 ; Copy the arg to the global
562 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
563
564 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
565
566 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)(
567
568 ; ALL-DAG: sw [[ARG1]], 4([[GV]])
569
570 ; ALL: teqi $zero, 2
571
572 ; Increment [[VA]] again.
573 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
574 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
575 ; O32-DAG: sw [[VA2]], 0([[SP]])
576
577 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
578 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
579 ; N32-DAG: sw [[VA3]], 0([[SP]])
580
581 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
582 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
583 ; N64-DAG: sd [[VA3]], 0([[SP]])
584
585 ; Load the second argument from the variable portion.
586 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
587
588 ; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
589 ; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
590
591 ; Copy the arg to the global
592 ; ALL-DAG: sw [[ARG2]], 8([[GV]])
593
594 %ap = alloca i8*, align 8
595 %ap2 = bitcast i8** %ap to i8*
596 call void @llvm.va_start(i8* %ap2)
597
598 call void asm sideeffect "teqi $$zero, 1", ""()
599 %arg1 = va_arg i8** %ap, i32
600 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
601 store volatile i32 %arg1, i32* %e1, align 4
602
603 call void asm sideeffect "teqi $$zero, 2", ""()
604 %arg2 = va_arg i8** %ap, i32
605 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
606 store volatile i32 %arg2, i32* %e2, align 4
607
608 call void @llvm.va_end(i8* %ap2)
609
610 ret void
611 }
612
613 define void @fn_i32_dotdotdot_i64(i32 %a, ...) {
614 entry:
615 ; ALL-LABEL: fn_i32_dotdotdot_i64:
616
617 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
618 ; the argument save area (56 bytes).
619 ; O32: addiu [[SP:\$sp]], $sp, -8
620 ; N32: addiu [[SP:\$sp]], $sp, -64
621 ; N64: daddiu [[SP:\$sp]], $sp, -64
622
623 ; Save variable argument portion on the stack
624 ; O32-DAG: sw $7, 20([[SP]])
625 ; O32-DAG: sw $6, 16([[SP]])
626 ; O32-DAG: sw $5, 12([[SP]])
627
628 ; NEW-DAG: sd $11, 56([[SP]])
629 ; NEW-DAG: sd $10, 48([[SP]])
630 ; NEW-DAG: sd $9, 40([[SP]])
631 ; NEW-DAG: sd $8, 32([[SP]])
632 ; NEW-DAG: sd $7, 24([[SP]])
633 ; NEW-DAG: sd $6, 16([[SP]])
634 ; NEW-DAG: sd $5, 8([[SP]])
635
636 ; Initialize variable argument pointer.
637 ; For O32, the offset is 12 due to the 4 bytes used to store local variables,
638 ; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
639 ; fixed argument.
640 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
641 ; space.
642 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
643 ; O32-DAG: sw [[VA]], 0([[SP]])
644
645 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
646 ; N32-DAG: sw [[VA]], 0([[SP]])
647
648 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
649 ; N64-DAG: sd [[VA]], 0([[SP]])
650
651 ; Store [[VA]]
652 ; O32-DAG: sw [[VA]], 0([[SP]])
653
654 ; ALL: teqi $zero, 1
655
656 ; Increment [[VA]] (and realign pointer for O32)
657 ; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
658 ; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
659 ; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8
660 ; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
661 ; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
662 ; O32-DAG: sw [[VA2]], 0([[SP]])
663
664 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
665 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
666 ; N32-DAG: sw [[VA2]], 0([[SP]])
667
668 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
669 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
670 ; N64-DAG: sd [[VA2]], 0([[SP]])
671
672 ; Load the first argument from the variable portion and copy it to the global.
673 ; This has used the stack pointer directly rather than the [[VA]] we just set
674 ; up.
675 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
676 ; order.
677 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
678 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA_TMP2]])
679 ; O32-DAG: sw [[ARG1]], 8([[GV]])
680 ; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
681 ; O32-DAG: sw [[VA3]], 0([[SP]])
682 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]])
683 ; O32-DAG: sw [[ARG1]], 12([[GV]])
684
685 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
686 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)(
687 ; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
688 ; NEW-DAG: sd [[ARG1]], 8([[GV]])
689
690 ; ALL: teqi $zero, 2
691
692 ; Increment [[VA]] again.
693 ; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
694 ; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
695 ; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
696 ; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
697 ; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
698 ; O32-DAG: sw [[VA2]], 0([[SP]])
699
700 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
701 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
702 ; N32-DAG: sw [[VA3]], 0([[SP]])
703
704 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
705 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
706 ; N64-DAG: sd [[VA3]], 0([[SP]])
707
708 ; Load the second argument from the variable portion and copy it to the global.
709 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
710 ; O32-DAG: sw [[ARG2]], 16([[GV]])
711 ; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
712 ; O32-DAG: sw [[VA2]], 0([[SP]])
713 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]])
714 ; O32-DAG: sw [[ARG2]], 20([[GV]])
715
716 ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
717 ; NEW-DAG: sd [[ARG2]], 16([[GV]])
718
719 %ap = alloca i8*, align 8
720 %ap2 = bitcast i8** %ap to i8*
721 call void @llvm.va_start(i8* %ap2)
722
723 call void asm sideeffect "teqi $$zero, 1", ""()
724 %arg1 = va_arg i8** %ap, i64
725 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
726 store volatile i64 %arg1, i64* %e1, align 8
727
728 call void asm sideeffect "teqi $$zero, 2", ""()
729 %arg2 = va_arg i8** %ap, i64
730 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
731 store volatile i64 %arg2, i64* %e2, align 8
732
733 call void @llvm.va_end(i8* %ap2)
734
735 ret void
736 }
737
738 define void @fn_i64_dotdotdot_i16(i64 %a, ...) {
739 entry:
740 ; ALL-LABEL: fn_i64_dotdotdot_i16:
741
742 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
743 ; the argument save area (56 bytes).
744 ; O32: addiu [[SP:\$sp]], $sp, -8
745 ; N32: addiu [[SP:\$sp]], $sp, -64
746 ; N64: daddiu [[SP:\$sp]], $sp, -64
747
748 ; Save variable argument portion on the stack
749 ; O32-DAG: sw $7, 20([[SP]])
750 ; O32-DAG: sw $6, 16([[SP]])
751
752 ; NEW-DAG: sd $11, 56([[SP]])
753 ; NEW-DAG: sd $10, 48([[SP]])
754 ; NEW-DAG: sd $9, 40([[SP]])
755 ; NEW-DAG: sd $8, 32([[SP]])
756 ; NEW-DAG: sd $7, 24([[SP]])
757 ; NEW-DAG: sd $6, 16([[SP]])
758 ; NEW-DAG: sd $5, 8([[SP]])
759
760 ; Initialize variable argument pointer.
761 ; For O32, the offset is 16 due to the 4 bytes used to store local variables,
762 ; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
763 ; first fixed argument.
764 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
765 ; space.
766 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
767 ; O32-DAG: sw [[VA]], 0([[SP]])
768
769 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
770 ; N32-DAG: sw [[VA]], 0([[SP]])
771
772 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
773 ; N64-DAG: sd [[VA]], 0([[SP]])
774
775 ; Store [[VA]]
776 ; O32-DAG: sw [[VA]], 0([[SP]])
777
778 ; ALL: teqi $zero, 1
779
780 ; Increment [[VA]]
781 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
782 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
783 ; O32-DAG: sw [[VA2]], 0([[SP]])
784
785 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
786 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
787 ; N32-DAG: sw [[VA2]], 0([[SP]])
788
789 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
790 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
791 ; N64-DAG: sd [[VA2]], 0([[SP]])
792
793 ; Load the first argument from the variable portion.
794 ; This has used the stack pointer directly rather than the [[VA]] we just set
795 ; up.
796 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
797 ; order.
798 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
799
800 ; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
801 ; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
802
803 ; Copy the arg to the global
804 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
805
806 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
807
808 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)(
809
810 ; ALL-DAG: sh [[ARG1]], 2([[GV]])
811
812 ; ALL: teqi $zero, 2
813
814 ; Increment [[VA]] again.
815 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
816 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
817 ; O32-DAG: sw [[VA2]], 0([[SP]])
818
819 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
820 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
821 ; N32-DAG: sw [[VA3]], 0([[SP]])
822
823 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
824 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
825 ; N64-DAG: sd [[VA3]], 0([[SP]])
826
827 ; Load the second argument from the variable portion.
828 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
829
830 ; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
831 ; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
832
833 ; Copy the arg to the global
834 ; ALL-DAG: sh [[ARG2]], 4([[GV]])
835
836 %ap = alloca i8*, align 8
837 %ap2 = bitcast i8** %ap to i8*
838 call void @llvm.va_start(i8* %ap2)
839
840 call void asm sideeffect "teqi $$zero, 1", ""()
841 %arg1 = va_arg i8** %ap, i16
842 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
843 store volatile i16 %arg1, i16* %e1, align 2
844
845 call void asm sideeffect "teqi $$zero, 2", ""()
846 %arg2 = va_arg i8** %ap, i16
847 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
848 store volatile i16 %arg2, i16* %e2, align 2
849
850 call void @llvm.va_end(i8* %ap2)
851
852 ret void
853 }
854
855 define void @fn_i64_dotdotdot_i32(i64 %a, ...) {
856 entry:
857 ; ALL-LABEL: fn_i64_dotdotdot_i32:
858
859 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
860 ; the argument save area (56 bytes).
861 ; O32: addiu [[SP:\$sp]], $sp, -8
862 ; N32: addiu [[SP:\$sp]], $sp, -64
863 ; N64: daddiu [[SP:\$sp]], $sp, -64
864
865 ; Save variable argument portion on the stack
866 ; O32-DAG: sw $7, 20([[SP]])
867 ; O32-DAG: sw $6, 16([[SP]])
868
869 ; NEW-DAG: sd $11, 56([[SP]])
870 ; NEW-DAG: sd $10, 48([[SP]])
871 ; NEW-DAG: sd $9, 40([[SP]])
872 ; NEW-DAG: sd $8, 32([[SP]])
873 ; NEW-DAG: sd $7, 24([[SP]])
874 ; NEW-DAG: sd $6, 16([[SP]])
875 ; NEW-DAG: sd $5, 8([[SP]])
876
877 ; Initialize variable argument pointer.
878 ; For O32, the offset is 16 due to the 4 bytes used to store local variables,
879 ; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
880 ; first fixed argument.
881 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
882 ; space.
883 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
884 ; O32-DAG: sw [[VA]], 0([[SP]])
885
886 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
887 ; N32-DAG: sw [[VA]], 0([[SP]])
888
889 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
890 ; N64-DAG: sd [[VA]], 0([[SP]])
891
892 ; Store [[VA]]
893 ; O32-DAG: sw [[VA]], 0([[SP]])
894
895 ; ALL: teqi $zero, 1
896
897 ; Increment [[VA]]
898 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
899 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
900 ; O32-DAG: sw [[VA2]], 0([[SP]])
901
902 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
903 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
904 ; N32-DAG: sw [[VA2]], 0([[SP]])
905
906 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
907 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
908 ; N64-DAG: sd [[VA2]], 0([[SP]])
909
910 ; Load the first argument from the variable portion.
911 ; This has used the stack pointer directly rather than the [[VA]] we just set
912 ; up.
913 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
914 ; order.
915 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
916
917 ; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
918 ; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
919
920 ; Copy the arg to the global
921 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
922
923 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
924
925 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)(
926
927 ; ALL-DAG: sw [[ARG1]], 4([[GV]])
928
929 ; ALL: teqi $zero, 2
930
931 ; Increment [[VA]] again.
932 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
933 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
934 ; O32-DAG: sw [[VA2]], 0([[SP]])
935
936 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
937 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
938 ; N32-DAG: sw [[VA3]], 0([[SP]])
939
940 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
941 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
942 ; N64-DAG: sd [[VA3]], 0([[SP]])
943
944 ; Load the second argument from the variable portion.
945 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
946
947 ; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
948 ; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
949
950 ; Copy the arg to the global
951 ; ALL-DAG: sw [[ARG2]], 8([[GV]])
952
953 %ap = alloca i8*, align 8
954 %ap2 = bitcast i8** %ap to i8*
955 call void @llvm.va_start(i8* %ap2)
956
957 call void asm sideeffect "teqi $$zero, 1", ""()
958 %arg1 = va_arg i8** %ap, i32
959 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
960 store volatile i32 %arg1, i32* %e1, align 4
961
962 call void asm sideeffect "teqi $$zero, 2", ""()
963 %arg2 = va_arg i8** %ap, i32
964 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
965 store volatile i32 %arg2, i32* %e2, align 4
966
967 call void @llvm.va_end(i8* %ap2)
968
969 ret void
970 }
971
972 define void @fn_i64_dotdotdot_i64(i64 %a, ...) {
973 entry:
974 ; ALL-LABEL: fn_i64_dotdotdot_i64:
975
976 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
977 ; the argument save area (56 bytes).
978 ; O32: addiu [[SP:\$sp]], $sp, -8
979 ; N32: addiu [[SP:\$sp]], $sp, -64
980 ; N64: daddiu [[SP:\$sp]], $sp, -64
981
982 ; Save variable argument portion on the stack
983 ; O32-DAG: sw $7, 20([[SP]])
984 ; O32-DAG: sw $6, 16([[SP]])
985
986 ; NEW-DAG: sd $11, 56([[SP]])
987 ; NEW-DAG: sd $10, 48([[SP]])
988 ; NEW-DAG: sd $9, 40([[SP]])
989 ; NEW-DAG: sd $8, 32([[SP]])
990 ; NEW-DAG: sd $7, 24([[SP]])
991 ; NEW-DAG: sd $6, 16([[SP]])
992 ; NEW-DAG: sd $5, 8([[SP]])
993
994 ; Initialize variable argument pointer.
995 ; For O32, the offset is 16 due to the 4 bytes used to store local variables,
996 ; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
997 ; first fixed argument.
998 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
999 ; space.
1000 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
1001282 ; O32-DAG: sw [[VA]], 0([[SP]])
1002283
1003284 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
1035316 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
1036317 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
1037318 ; O32-DAG: sw [[ARG1]], 8([[GV]])
1038 ; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
1039 ; O32-DAG: sw [[VA3]], 0([[SP]])
1040 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]])
319 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
320 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
321 ; O32-DAG: sw [[VA2]], 0([[SP]])
322 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
1041323 ; O32-DAG: sw [[ARG1]], 12([[GV]])
1042324
1043325 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
1066348 ; Load the second argument from the variable portion and copy it to the global.
1067349 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
1068350 ; O32-DAG: sw [[ARG2]], 16([[GV]])
1069 ; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4
1070 ; O32-DAG: sw [[VA3]], 0([[SP]])
1071 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]])
351 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
352 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
353 ; O32-DAG: sw [[VA2]], 0([[SP]])
354 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
1072355 ; O32-DAG: sw [[ARG2]], 20([[GV]])
1073356
1074357 ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
1093376 ret void
1094377 }
1095378
379 define void @fn_i32_dotdotdot_i16(i32 %a, ...) {
380 entry:
381 ; ALL-LABEL: fn_i32_dotdotdot_i16:
382
383 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
384 ; the argument save area (56 bytes).
385 ; O32: addiu [[SP:\$sp]], $sp, -8
386 ; N32: addiu [[SP:\$sp]], $sp, -64
387 ; N64: daddiu [[SP:\$sp]], $sp, -64
388
389 ; Save variable argument portion on the stack
390 ; O32-DAG: sw $7, 20([[SP]])
391 ; O32-DAG: sw $6, 16([[SP]])
392 ; O32-DAG: sw $5, 12([[SP]])
393
394 ; NEW-DAG: sd $11, 56([[SP]])
395 ; NEW-DAG: sd $10, 48([[SP]])
396 ; NEW-DAG: sd $9, 40([[SP]])
397 ; NEW-DAG: sd $8, 32([[SP]])
398 ; NEW-DAG: sd $7, 24([[SP]])
399 ; NEW-DAG: sd $6, 16([[SP]])
400 ; NEW-DAG: sd $5, 8([[SP]])
401
402 ; Initialize variable argument pointer.
403 ; For O32, the offset is 12 due to the 4 bytes used to store local variables,
404 ; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
405 ; fixed argument.
406 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
407 ; space.
408 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
409 ; O32-DAG: sw [[VA]], 0([[SP]])
410
411 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
412 ; N32-DAG: sw [[VA]], 0([[SP]])
413
414 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
415 ; N64-DAG: sd [[VA]], 0([[SP]])
416
417 ; Store [[VA]]
418 ; O32-DAG: sw [[VA]], 0([[SP]])
419
420 ; ALL: teqi $zero, 1
421
422 ; Increment [[VA]]
423 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
424 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
425 ; O32-DAG: sw [[VA2]], 0([[SP]])
426
427 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
428 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
429 ; N32-DAG: sw [[VA2]], 0([[SP]])
430
431 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
432 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
433 ; N64-DAG: sd [[VA2]], 0([[SP]])
434
435 ; Load the first argument from the variable portion.
436 ; This has used the stack pointer directly rather than the [[VA]] we just set
437 ; up.
438 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
439 ; order.
440 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
441
442 ; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
443 ; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
444
445 ; Copy the arg to the global
446 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
447
448 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
449
450 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)(
451
452 ; ALL-DAG: sh [[ARG1]], 2([[GV]])
453
454 ; ALL: teqi $zero, 2
455
456 ; Increment [[VA]] again.
457 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
458 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
459 ; O32-DAG: sw [[VA2]], 0([[SP]])
460
461 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
462 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
463 ; N32-DAG: sw [[VA3]], 0([[SP]])
464
465 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
466 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
467 ; N64-DAG: sd [[VA3]], 0([[SP]])
468
469 ; Load the second argument from the variable portion.
470 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
471
472 ; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
473 ; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
474
475 ; Copy the arg to the global
476 ; ALL-DAG: sh [[ARG2]], 4([[GV]])
477
478 %ap = alloca i8*, align 8
479 %ap2 = bitcast i8** %ap to i8*
480 call void @llvm.va_start(i8* %ap2)
481
482 call void asm sideeffect "teqi $$zero, 1", ""()
483 %arg1 = va_arg i8** %ap, i16
484 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
485 store volatile i16 %arg1, i16* %e1, align 2
486
487 call void asm sideeffect "teqi $$zero, 2", ""()
488 %arg2 = va_arg i8** %ap, i16
489 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
490 store volatile i16 %arg2, i16* %e2, align 2
491
492 call void @llvm.va_end(i8* %ap2)
493
494 ret void
495 }
496
497 define void @fn_i32_dotdotdot_i32(i32 %a, ...) {
498 entry:
499 ; ALL-LABEL: fn_i32_dotdotdot_i32:
500
501 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
502 ; the argument save area (56 bytes).
503 ; O32: addiu [[SP:\$sp]], $sp, -8
504 ; N32: addiu [[SP:\$sp]], $sp, -64
505 ; N64: daddiu [[SP:\$sp]], $sp, -64
506
507 ; Save variable argument portion on the stack
508 ; O32-DAG: sw $7, 20([[SP]])
509 ; O32-DAG: sw $6, 16([[SP]])
510 ; O32-DAG: sw $5, 12([[SP]])
511
512 ; NEW-DAG: sd $11, 56([[SP]])
513 ; NEW-DAG: sd $10, 48([[SP]])
514 ; NEW-DAG: sd $9, 40([[SP]])
515 ; NEW-DAG: sd $8, 32([[SP]])
516 ; NEW-DAG: sd $7, 24([[SP]])
517 ; NEW-DAG: sd $6, 16([[SP]])
518 ; NEW-DAG: sd $5, 8([[SP]])
519
520 ; Initialize variable argument pointer.
521 ; For O32, the offset is 12 due to the 4 bytes used to store local variables,
522 ; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
523 ; fixed argument.
524 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
525 ; space.
526 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
527 ; O32-DAG: sw [[VA]], 0([[SP]])
528
529 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
530 ; N32-DAG: sw [[VA]], 0([[SP]])
531
532 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
533 ; N64-DAG: sd [[VA]], 0([[SP]])
534
535 ; Store [[VA]]
536 ; O32-DAG: sw [[VA]], 0([[SP]])
537
538 ; ALL: teqi $zero, 1
539
540 ; Increment [[VA]]
541 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
542 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
543 ; O32-DAG: sw [[VA2]], 0([[SP]])
544
545 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
546 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
547 ; N32-DAG: sw [[VA2]], 0([[SP]])
548
549 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
550 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
551 ; N64-DAG: sd [[VA2]], 0([[SP]])
552
553 ; Load the first argument from the variable portion.
554 ; This has used the stack pointer directly rather than the [[VA]] we just set
555 ; up.
556 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
557 ; order.
558 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
559
560 ; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
561 ; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
562
563 ; Copy the arg to the global
564 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
565
566 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
567
568 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)(
569
570 ; ALL-DAG: sw [[ARG1]], 4([[GV]])
571
572 ; ALL: teqi $zero, 2
573
574 ; Increment [[VA]] again.
575 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
576 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
577 ; O32-DAG: sw [[VA2]], 0([[SP]])
578
579 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
580 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
581 ; N32-DAG: sw [[VA3]], 0([[SP]])
582
583 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
584 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
585 ; N64-DAG: sd [[VA3]], 0([[SP]])
586
587 ; Load the second argument from the variable portion.
588 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
589
590 ; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
591 ; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
592
593 ; Copy the arg to the global
594 ; ALL-DAG: sw [[ARG2]], 8([[GV]])
595
596 %ap = alloca i8*, align 8
597 %ap2 = bitcast i8** %ap to i8*
598 call void @llvm.va_start(i8* %ap2)
599
600 call void asm sideeffect "teqi $$zero, 1", ""()
601 %arg1 = va_arg i8** %ap, i32
602 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
603 store volatile i32 %arg1, i32* %e1, align 4
604
605 call void asm sideeffect "teqi $$zero, 2", ""()
606 %arg2 = va_arg i8** %ap, i32
607 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
608 store volatile i32 %arg2, i32* %e2, align 4
609
610 call void @llvm.va_end(i8* %ap2)
611
612 ret void
613 }
614
615 define void @fn_i32_dotdotdot_i64(i32 %a, ...) {
616 entry:
617 ; ALL-LABEL: fn_i32_dotdotdot_i64:
618
619 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
620 ; the argument save area (56 bytes).
621 ; O32: addiu [[SP:\$sp]], $sp, -8
622 ; N32: addiu [[SP:\$sp]], $sp, -64
623 ; N64: daddiu [[SP:\$sp]], $sp, -64
624
625 ; Save variable argument portion on the stack
626 ; O32-DAG: sw $7, 20([[SP]])
627 ; O32-DAG: sw $6, 16([[SP]])
628 ; O32-DAG: sw $5, 12([[SP]])
629
630 ; NEW-DAG: sd $11, 56([[SP]])
631 ; NEW-DAG: sd $10, 48([[SP]])
632 ; NEW-DAG: sd $9, 40([[SP]])
633 ; NEW-DAG: sd $8, 32([[SP]])
634 ; NEW-DAG: sd $7, 24([[SP]])
635 ; NEW-DAG: sd $6, 16([[SP]])
636 ; NEW-DAG: sd $5, 8([[SP]])
637
638 ; Initialize variable argument pointer.
639 ; For O32, the offset is 12 due to the 4 bytes used to store local variables,
640 ; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
641 ; fixed argument.
642 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
643 ; space.
644 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
645 ; O32-DAG: sw [[VA]], 0([[SP]])
646
647 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
648 ; N32-DAG: sw [[VA]], 0([[SP]])
649
650 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
651 ; N64-DAG: sd [[VA]], 0([[SP]])
652
653 ; Store [[VA]]
654 ; O32-DAG: sw [[VA]], 0([[SP]])
655
656 ; ALL: teqi $zero, 1
657
658 ; Increment [[VA]] (and realign pointer for O32)
659 ; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
660 ; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
661 ; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8
662 ; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
663 ; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
664 ; O32-DAG: sw [[VA2]], 0([[SP]])
665
666 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
667 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
668 ; N32-DAG: sw [[VA2]], 0([[SP]])
669
670 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
671 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
672 ; N64-DAG: sd [[VA2]], 0([[SP]])
673
674 ; Load the first argument from the variable portion and copy it to the global.
675 ; This has used the stack pointer directly rather than the [[VA]] we just set
676 ; up.
677 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
678 ; order.
679 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
680 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
681 ; O32-DAG: sw [[ARG1]], 8([[GV]])
682 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
683 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
684 ; O32-DAG: sw [[VA2]], 0([[SP]])
685 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
686 ; O32-DAG: sw [[ARG1]], 12([[GV]])
687
688 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
689 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)(
690 ; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
691 ; NEW-DAG: sd [[ARG1]], 8([[GV]])
692
693 ; ALL: teqi $zero, 2
694
695 ; Increment [[VA]] again.
696 ; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
697 ; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
698 ; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
699 ; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
700 ; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
701 ; O32-DAG: sw [[VA2]], 0([[SP]])
702
703 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
704 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
705 ; N32-DAG: sw [[VA3]], 0([[SP]])
706
707 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
708 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
709 ; N64-DAG: sd [[VA3]], 0([[SP]])
710
711 ; Load the second argument from the variable portion and copy it to the global.
712 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
713 ; O32-DAG: sw [[ARG2]], 16([[GV]])
714 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
715 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
716 ; O32-DAG: sw [[VA2]], 0([[SP]])
717 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
718 ; O32-DAG: sw [[ARG2]], 20([[GV]])
719
720 ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
721 ; NEW-DAG: sd [[ARG2]], 16([[GV]])
722
723 %ap = alloca i8*, align 8
724 %ap2 = bitcast i8** %ap to i8*
725 call void @llvm.va_start(i8* %ap2)
726
727 call void asm sideeffect "teqi $$zero, 1", ""()
728 %arg1 = va_arg i8** %ap, i64
729 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
730 store volatile i64 %arg1, i64* %e1, align 8
731
732 call void asm sideeffect "teqi $$zero, 2", ""()
733 %arg2 = va_arg i8** %ap, i64
734 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
735 store volatile i64 %arg2, i64* %e2, align 8
736
737 call void @llvm.va_end(i8* %ap2)
738
739 ret void
740 }
741
742 define void @fn_i64_dotdotdot_i16(i64 %a, ...) {
743 entry:
744 ; ALL-LABEL: fn_i64_dotdotdot_i16:
745
746 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
747 ; the argument save area (56 bytes).
748 ; O32: addiu [[SP:\$sp]], $sp, -8
749 ; N32: addiu [[SP:\$sp]], $sp, -64
750 ; N64: daddiu [[SP:\$sp]], $sp, -64
751
752 ; Save variable argument portion on the stack
753 ; O32-DAG: sw $7, 20([[SP]])
754 ; O32-DAG: sw $6, 16([[SP]])
755
756 ; NEW-DAG: sd $11, 56([[SP]])
757 ; NEW-DAG: sd $10, 48([[SP]])
758 ; NEW-DAG: sd $9, 40([[SP]])
759 ; NEW-DAG: sd $8, 32([[SP]])
760 ; NEW-DAG: sd $7, 24([[SP]])
761 ; NEW-DAG: sd $6, 16([[SP]])
762 ; NEW-DAG: sd $5, 8([[SP]])
763
764 ; Initialize variable argument pointer.
765 ; For O32, the offset is 16 due to the 4 bytes used to store local variables,
766 ; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
767 ; first fixed argument.
768 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
769 ; space.
770 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
771 ; O32-DAG: sw [[VA]], 0([[SP]])
772
773 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
774 ; N32-DAG: sw [[VA]], 0([[SP]])
775
776 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
777 ; N64-DAG: sd [[VA]], 0([[SP]])
778
779 ; Store [[VA]]
780 ; O32-DAG: sw [[VA]], 0([[SP]])
781
782 ; ALL: teqi $zero, 1
783
784 ; Increment [[VA]]
785 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
786 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
787 ; O32-DAG: sw [[VA2]], 0([[SP]])
788
789 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
790 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
791 ; N32-DAG: sw [[VA2]], 0([[SP]])
792
793 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
794 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
795 ; N64-DAG: sd [[VA2]], 0([[SP]])
796
797 ; Load the first argument from the variable portion.
798 ; This has used the stack pointer directly rather than the [[VA]] we just set
799 ; up.
800 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
801 ; order.
802 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
803
804 ; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
805 ; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
806
807 ; Copy the arg to the global
808 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
809
810 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
811
812 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)(
813
814 ; ALL-DAG: sh [[ARG1]], 2([[GV]])
815
816 ; ALL: teqi $zero, 2
817
818 ; Increment [[VA]] again.
819 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
820 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
821 ; O32-DAG: sw [[VA2]], 0([[SP]])
822
823 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
824 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
825 ; N32-DAG: sw [[VA3]], 0([[SP]])
826
827 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
828 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
829 ; N64-DAG: sd [[VA3]], 0([[SP]])
830
831 ; Load the second argument from the variable portion.
832 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
833
834 ; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
835 ; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
836
837 ; Copy the arg to the global
838 ; ALL-DAG: sh [[ARG2]], 4([[GV]])
839
840 %ap = alloca i8*, align 8
841 %ap2 = bitcast i8** %ap to i8*
842 call void @llvm.va_start(i8* %ap2)
843
844 call void asm sideeffect "teqi $$zero, 1", ""()
845 %arg1 = va_arg i8** %ap, i16
846 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
847 store volatile i16 %arg1, i16* %e1, align 2
848
849 call void asm sideeffect "teqi $$zero, 2", ""()
850 %arg2 = va_arg i8** %ap, i16
851 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
852 store volatile i16 %arg2, i16* %e2, align 2
853
854 call void @llvm.va_end(i8* %ap2)
855
856 ret void
857 }
858
859 define void @fn_i64_dotdotdot_i32(i64 %a, ...) {
860 entry:
861 ; ALL-LABEL: fn_i64_dotdotdot_i32:
862
863 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
864 ; the argument save area (56 bytes).
865 ; O32: addiu [[SP:\$sp]], $sp, -8
866 ; N32: addiu [[SP:\$sp]], $sp, -64
867 ; N64: daddiu [[SP:\$sp]], $sp, -64
868
869 ; Save variable argument portion on the stack
870 ; O32-DAG: sw $7, 20([[SP]])
871 ; O32-DAG: sw $6, 16([[SP]])
872
873 ; NEW-DAG: sd $11, 56([[SP]])
874 ; NEW-DAG: sd $10, 48([[SP]])
875 ; NEW-DAG: sd $9, 40([[SP]])
876 ; NEW-DAG: sd $8, 32([[SP]])
877 ; NEW-DAG: sd $7, 24([[SP]])
878 ; NEW-DAG: sd $6, 16([[SP]])
879 ; NEW-DAG: sd $5, 8([[SP]])
880
881 ; Initialize variable argument pointer.
882 ; For O32, the offset is 16 due to the 4 bytes used to store local variables,
883 ; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
884 ; first fixed argument.
885 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
886 ; space.
887 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
888 ; O32-DAG: sw [[VA]], 0([[SP]])
889
890 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
891 ; N32-DAG: sw [[VA]], 0([[SP]])
892
893 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
894 ; N64-DAG: sd [[VA]], 0([[SP]])
895
896 ; Store [[VA]]
897 ; O32-DAG: sw [[VA]], 0([[SP]])
898
899 ; ALL: teqi $zero, 1
900
901 ; Increment [[VA]]
902 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
903 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
904 ; O32-DAG: sw [[VA2]], 0([[SP]])
905
906 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
907 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
908 ; N32-DAG: sw [[VA2]], 0([[SP]])
909
910 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
911 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
912 ; N64-DAG: sd [[VA2]], 0([[SP]])
913
914 ; Load the first argument from the variable portion.
915 ; This has used the stack pointer directly rather than the [[VA]] we just set
916 ; up.
917 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
918 ; order.
919 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
920
921 ; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
922 ; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
923
924 ; Copy the arg to the global
925 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
926
927 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
928
929 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)(
930
931 ; ALL-DAG: sw [[ARG1]], 4([[GV]])
932
933 ; ALL: teqi $zero, 2
934
935 ; Increment [[VA]] again.
936 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
937 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
938 ; O32-DAG: sw [[VA2]], 0([[SP]])
939
940 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
941 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
942 ; N32-DAG: sw [[VA3]], 0([[SP]])
943
944 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
945 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
946 ; N64-DAG: sd [[VA3]], 0([[SP]])
947
948 ; Load the second argument from the variable portion.
949 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
950
951 ; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
952 ; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
953
954 ; Copy the arg to the global
955 ; ALL-DAG: sw [[ARG2]], 8([[GV]])
956
957 %ap = alloca i8*, align 8
958 %ap2 = bitcast i8** %ap to i8*
959 call void @llvm.va_start(i8* %ap2)
960
961 call void asm sideeffect "teqi $$zero, 1", ""()
962 %arg1 = va_arg i8** %ap, i32
963 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
964 store volatile i32 %arg1, i32* %e1, align 4
965
966 call void asm sideeffect "teqi $$zero, 2", ""()
967 %arg2 = va_arg i8** %ap, i32
968 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
969 store volatile i32 %arg2, i32* %e2, align 4
970
971 call void @llvm.va_end(i8* %ap2)
972
973 ret void
974 }
975
976 define void @fn_i64_dotdotdot_i64(i64 %a, ...) {
977 entry:
978 ; ALL-LABEL: fn_i64_dotdotdot_i64:
979
980 ; Set up the stack with an 8-byte local area. N32/N64 must also make room for
981 ; the argument save area (56 bytes).
982 ; O32: addiu [[SP:\$sp]], $sp, -8
983 ; N32: addiu [[SP:\$sp]], $sp, -64
984 ; N64: daddiu [[SP:\$sp]], $sp, -64
985
986 ; Save variable argument portion on the stack
987 ; O32-DAG: sw $7, 20([[SP]])
988 ; O32-DAG: sw $6, 16([[SP]])
989
990 ; NEW-DAG: sd $11, 56([[SP]])
991 ; NEW-DAG: sd $10, 48([[SP]])
992 ; NEW-DAG: sd $9, 40([[SP]])
993 ; NEW-DAG: sd $8, 32([[SP]])
994 ; NEW-DAG: sd $7, 24([[SP]])
995 ; NEW-DAG: sd $6, 16([[SP]])
996 ; NEW-DAG: sd $5, 8([[SP]])
997
998 ; Initialize variable argument pointer.
999 ; For O32, the offset is 16 due to the 4 bytes used to store local variables,
1000 ; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
1001 ; first fixed argument.
1002 ; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
1003 ; space.
1004 ; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
1005 ; O32-DAG: sw [[VA]], 0([[SP]])
1006
1007 ; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
1008 ; N32-DAG: sw [[VA]], 0([[SP]])
1009
1010 ; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
1011 ; N64-DAG: sd [[VA]], 0([[SP]])
1012
1013 ; Store [[VA]]
1014 ; O32-DAG: sw [[VA]], 0([[SP]])
1015
1016 ; ALL: teqi $zero, 1
1017
1018 ; Increment [[VA]] (and realign pointer for O32)
1019 ; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
1020 ; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
1021 ; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8
1022 ; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
1023 ; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
1024 ; O32-DAG: sw [[VA2]], 0([[SP]])
1025
1026 ; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
1027 ; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
1028 ; N32-DAG: sw [[VA2]], 0([[SP]])
1029
1030 ; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
1031 ; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
1032 ; N64-DAG: sd [[VA2]], 0([[SP]])
1033
1034 ; Load the first argument from the variable portion and copy it to the global.
1035 ; This has used the stack pointer directly rather than the [[VA]] we just set
1036 ; up.
1037 ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
1038 ; order.
1039 ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
1040 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
1041 ; O32-DAG: sw [[ARG1]], 8([[GV]])
1042 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
1043 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
1044 ; O32-DAG: sw [[VA2]], 0([[SP]])
1045 ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
1046 ; O32-DAG: sw [[ARG1]], 12([[GV]])
1047
1048 ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
1049 ; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)(
1050 ; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
1051 ; NEW-DAG: sd [[ARG1]], 8([[GV]])
1052
1053 ; ALL: teqi $zero, 2
1054
1055 ; Increment [[VA]] again.
1056 ; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
1057 ; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
1058 ; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
1059 ; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
1060 ; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
1061 ; O32-DAG: sw [[VA2]], 0([[SP]])
1062
1063 ; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
1064 ; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
1065 ; N32-DAG: sw [[VA3]], 0([[SP]])
1066
1067 ; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
1068 ; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
1069 ; N64-DAG: sd [[VA3]], 0([[SP]])
1070
1071 ; Load the second argument from the variable portion and copy it to the global.
1072 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
1073 ; O32-DAG: sw [[ARG2]], 16([[GV]])
1074 ; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
1075 ; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
1076 ; O32-DAG: sw [[VA2]], 0([[SP]])
1077 ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
1078 ; O32-DAG: sw [[ARG2]], 20([[GV]])
1079
1080 ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
1081 ; NEW-DAG: sd [[ARG2]], 16([[GV]])
1082
1083 %ap = alloca i8*, align 8
1084 %ap2 = bitcast i8** %ap to i8*
1085 call void @llvm.va_start(i8* %ap2)
1086
1087 call void asm sideeffect "teqi $$zero, 1", ""()
1088 %arg1 = va_arg i8** %ap, i64
1089 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
1090 store volatile i64 %arg1, i64* %e1, align 8
1091
1092 call void asm sideeffect "teqi $$zero, 2", ""()
1093 %arg2 = va_arg i8** %ap, i64
1094 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
1095 store volatile i64 %arg2, i64* %e2, align 8
1096
1097 call void @llvm.va_end(i8* %ap2)
1098
1099 ret void
1100 }
1101
10961102 declare void @llvm.va_start(i8*)
10971103 declare void @llvm.va_end(i8*)
131131 define internal fastcc void @callee0(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) nounwind noinline {
132132 entry:
133133 ; CHECK: callee0
134 ; CHECK-DAG: sw $4
135 ; CHECK-DAG: sw $5
136 ; CHECK-DAG: sw $7
137 ; CHECK-DAG: sw $8
138 ; CHECK-DAG: sw $9
139 ; CHECK-DAG: sw $10
140 ; CHECK-DAG: sw $11
141 ; CHECK-DAG: sw $12
142 ; CHECK-DAG: sw $13
143 ; CHECK-DAG: sw $14
144 ; CHECK-DAG: sw $15
145 ; CHECK-DAG: sw $24
146 ; CHECK-DAG: sw $3
134 ; CHECK: sw $4
135 ; CHECK: sw $5
136 ; CHECK: sw $6
137 ; CHECK: sw $7
138 ; CHECK: sw $8
139 ; CHECK: sw $9
140 ; CHECK: sw $10
141 ; CHECK: sw $11
142 ; CHECK: sw $12
143 ; CHECK: sw $13
144 ; CHECK: sw $14
145 ; CHECK: sw $15
146 ; CHECK: sw $24
147 ; CHECK: sw $3
147148
148149 ; t6, t7 and t8 are reserved in NaCl and cannot be used for fastcc.
149150 ; CHECK-NACL-NOT: sw $14
221222
222223 define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline {
223224 entry:
224 ; CHECK-LABEL: callee1:
225 ; CHECK-DAG: swc1 $f0
226 ; CHECK-DAG: swc1 $f1
227 ; CHECK-DAG: swc1 $f2
228 ; CHECK-DAG: swc1 $f3
229 ; CHECK-DAG: swc1 $f4
230 ; CHECK-DAG: swc1 $f5
231 ; CHECK-DAG: swc1 $f6
232 ; CHECK-DAG: swc1 $f7
233 ; CHECK-DAG: swc1 $f8
234 ; CHECK-DAG: swc1 $f9
235 ; CHECK-DAG: swc1 $f10
236 ; CHECK-DAG: swc1 $f11
237 ; CHECK-DAG: swc1 $f12
238 ; CHECK-DAG: swc1 $f13
239 ; CHECK-DAG: swc1 $f14
240 ; CHECK-DAG: swc1 $f15
241 ; CHECK-DAG: swc1 $f16
242 ; CHECK-DAG: swc1 $f17
243 ; CHECK-DAG: swc1 $f18
244 ; CHECK-DAG: swc1 $f19
225 ; CHECK: callee1
226 ; CHECK: swc1 $f0
227 ; CHECK: swc1 $f1
228 ; CHECK: swc1 $f2
229 ; CHECK: swc1 $f3
230 ; CHECK: swc1 $f4
231 ; CHECK: swc1 $f5
232 ; CHECK: swc1 $f6
233 ; CHECK: swc1 $f7
234 ; CHECK: swc1 $f8
235 ; CHECK: swc1 $f9
236 ; CHECK: swc1 $f10
237 ; CHECK: swc1 $f11
238 ; CHECK: swc1 $f12
239 ; CHECK: swc1 $f13
240 ; CHECK: swc1 $f14
241 ; CHECK: swc1 $f15
242 ; CHECK: swc1 $f16
243 ; CHECK: swc1 $f17
244 ; CHECK: swc1 $f18
245 ; CHECK: swc1 $f19
245246
246247 store float %a0, float* @gf0, align 4
247248 store float %a1, float* @gf1, align 4
314315
315316 ; NOODDSPREG-LABEL: callee2:
316317
318 ; NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]]
319
317320 ; Check that first 10 arguments are received in even float registers
318321 ; f0, f2, ... , f18. Check that 11th argument is received on stack.
319322
329332 ; NOODDSPREG-DAG: swc1 $f16, 32($[[R0]])
330333 ; NOODDSPREG-DAG: swc1 $f18, 36($[[R0]])
331334
332 ; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 0($sp)
335 ; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
333336 ; NOODDSPREG-DAG: swc1 $[[F0]], 40($[[R0]])
334337
335338 store float %a0, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4
393396
394397 ; FP64-NOODDSPREG-LABEL: callee3:
395398
399 ; FP64-NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]]
396400
397401 ; Check that first 10 arguments are received in even float registers
398402 ; f0, f2, ... , f18. Check that 11th argument is received on stack.
409413 ; FP64-NOODDSPREG-DAG: sdc1 $f16, 64($[[R0]])
410414 ; FP64-NOODDSPREG-DAG: sdc1 $f18, 72($[[R0]])
411415
412 ; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], 0($sp)
416 ; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
413417 ; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 80($[[R0]])
414418
415419 store double %a0, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 0), align 8
249249 ; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
250250 ; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
251251
252 ; MIPS32-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
253 ; MIPS32-DAG: sb $[[R1]], 2($[[PTR]])
254 ; MIPS32-DAG: lbu $[[R2:[0-9]+]], 1($[[PTR]])
255 ; MIPS32-DAG: sb $[[R2]], 3($[[PTR]])
256
257 ; MIPS32R6: lhu $[[R1:[0-9]+]], 0($[[PTR]])
258 ; MIPS32R6: sh $[[R1]], 2($[[PTR]])
259
260 ; MIPS64-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
261 ; MIPS64-DAG: sb $[[R1]], 2($[[PTR]])
262 ; MIPS64-DAG: lbu $[[R2:[0-9]+]], 1($[[PTR]])
263 ; MIPS64-DAG: sb $[[R2]], 3($[[PTR]])
252 ; FIXME: We should be able to do better than this on MIPS32r6/MIPS64r6 since
253 ; we have unaligned halfword load/store available
254 ; ALL-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
255 ; ALL-DAG: sb $[[R1]], 2($[[PTR]])
256 ; ALL-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
257 ; ALL-DAG: sb $[[R1]], 3($[[PTR]])
264258
265259 %0 = load %struct.S0, %struct.S0* getelementptr inbounds (%struct.S0, %struct.S0* @struct_s0, i32 0), align 1
266260 store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0, %struct.S0* @struct_s0, i32 1), align 1
273267
274268 ; MIPS32-EL: lw $[[PTR:[0-9]+]], %got(struct_s1)(
275269 ; MIPS32-EB: lw $[[PTR:[0-9]+]], %got(struct_s1)(
276 ; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
277 ; MIPS32-EL-DAG: lwr $[[R1]], 0($[[PTR]])
278 ; MIPS32-EL-DAG: swl $[[R1]], 7($[[PTR]])
279 ; MIPS32-EL-DAG: swr $[[R1]], 4($[[PTR]])
280 ; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
281 ; MIPS32-EB-DAG: lwr $[[R1]], 3($[[PTR]])
282 ; MIPS32-EB-DAG: swl $[[R1]], 4($[[PTR]])
283 ; MIPS32-EB-DAG: swr $[[R1]], 7($[[PTR]])
284
285 ; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
286 ; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 4($[[PTR]])
287 ; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
288 ; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 5($[[PTR]])
289 ; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]])
290 ; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 6($[[PTR]])
291 ; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]])
292 ; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 7($[[PTR]])
270 ; MIPS32-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
271 ; MIPS32-DAG: sb $[[R1]], 4($[[PTR]])
272 ; MIPS32-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
273 ; MIPS32-DAG: sb $[[R1]], 5($[[PTR]])
274 ; MIPS32-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]])
275 ; MIPS32-DAG: sb $[[R1]], 6($[[PTR]])
276 ; MIPS32-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]])
277 ; MIPS32-DAG: sb $[[R1]], 7($[[PTR]])
293278
294279 ; MIPS32R6: lw $[[PTR:[0-9]+]], %got(struct_s1)(
295 ; MIPS32R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]])
296 ; MIPS32R6-DAG: sw $[[R1]], 4($[[PTR]])
280 ; MIPS32R6-DAG: lhu $[[R1:[0-9]+]], 0($[[PTR]])
281 ; MIPS32R6-DAG: sh $[[R1]], 4($[[PTR]])
282 ; MIPS32R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]])
283 ; MIPS32R6-DAG: sh $[[R1]], 6($[[PTR]])
297284
298285 ; MIPS64-EL: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
299286 ; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
300
301 ; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
302 ; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]])
303 ; MIPS64-EL-DAG: swl $[[R1]], 7($[[PTR]])
304 ; MIPS64-EL-DAG: swr $[[R1]], 4($[[PTR]])
305
306 ; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
307 ; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]])
308 ; MIPS64-EB-DAG: swl $[[R1]], 4($[[PTR]])
309 ; MIPS64-EB-DAG: swr $[[R1]], 7($[[PTR]])
310
311
312 ; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
313 ; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 4($[[PTR]])
314 ; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
315 ; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 5($[[PTR]])
316 ; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]])
317 ; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 6($[[PTR]])
318 ; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]])
319 ; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 7($[[PTR]])
287 ; MIPS64-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]])
288 ; MIPS64-DAG: sb $[[R1]], 4($[[PTR]])
289 ; MIPS64-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
290 ; MIPS64-DAG: sb $[[R1]], 5($[[PTR]])
291 ; MIPS64-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]])
292 ; MIPS64-DAG: sb $[[R1]], 6($[[PTR]])
293 ; MIPS64-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]])
294 ; MIPS64-DAG: sb $[[R1]], 7($[[PTR]])
320295
321296 ; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
322 ; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]])
323 ; MIPS64R6-DAG: sw $[[R1]], 4($[[PTR]])
297 ; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 0($[[PTR]])
298 ; MIPS64R6-DAG: sh $[[R1]], 4($[[PTR]])
299 ; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]])
300 ; MIPS64R6-DAG: sh $[[R1]], 6($[[PTR]])
324301
325