llvm.org GIT mirror llvm / 892af54
ARMLoadStoreOptimizer: Rewrite LDM/STM matching logic. This improves the logic in several ways and is a preparation for followup patches: - First perform an analysis and create a list of merge candidates, then transform. This simplifies the code in that you have don't have to care to much anymore that you may be holding iterators to MachineInstrs that get removed. - Analyze/Transform basic blocks in reverse order. This allows to use LivePhysRegs to find free registers instead of the RegisterScavenger. The RegisterScavenger will become less precise in the future as it relies on the deprecated kill-flags. - Return the newly created node in MergeOps so there's no need to look around in the schedule to find it. - Rename some MBBI iterators to InsertBefore to make their role clear. - General code cleanup. Differential Revision: http://reviews.llvm.org/D10140 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241920 91177308-0d34-0410-b5e6-96231b3b80d8 Matthias Braun 5 years ago
2 changed file(s) with 793 addition(s) and 863 deletion(s). Raw diff Collapse all Expand all
3030 #include "llvm/CodeGen/MachineInstr.h"
3131 #include "llvm/CodeGen/MachineInstrBuilder.h"
3232 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/RegisterScavenging.h"
33 #include "llvm/CodeGen/RegisterClassInfo.h"
3434 #include "llvm/CodeGen/SelectionDAGNodes.h"
35 #include "llvm/CodeGen/LivePhysRegs.h"
3536 #include "llvm/IR/DataLayout.h"
3637 #include "llvm/IR/DerivedTypes.h"
3738 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Allocator.h"
3840 #include "llvm/Support/Debug.h"
3941 #include "llvm/Support/ErrorHandling.h"
4042 #include "llvm/Support/raw_ostream.h"
6466 static char ID;
6567 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
6668
69 const MachineFunction *MF;
6770 const TargetInstrInfo *TII;
6871 const TargetRegisterInfo *TRI;
72 const MachineRegisterInfo *MRI;
6973 const ARMSubtarget *STI;
7074 const TargetLowering *TL;
7175 ARMFunctionInfo *AFI;
72 RegScavenger *RS;
76 LivePhysRegs LiveRegs;
77 RegisterClassInfo RegClassInfo;
78 MachineBasicBlock::const_iterator LiveRegPos;
79 bool LiveRegsValid;
80 bool RegClassInfoValid;
7381 bool isThumb1, isThumb2;
7482
7583 bool runOnMachineFunction(MachineFunction &Fn) override;
7987 }
8088
8189 private:
90 /// A set of load/store MachineInstrs with same base register sorted by
91 /// offset.
8292 struct MemOpQueueEntry {
83 int Offset;
84 unsigned Reg;
85 bool isKill;
86 unsigned Position;
87 MachineBasicBlock::iterator MBBI;
88 bool Merged;
89 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
90 MachineBasicBlock::iterator i)
91 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
93 MachineInstr *MI;
94 int Offset; ///< Load/Store offset.
95 unsigned Position; ///< Position as counted from end of basic block.
96 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
97 : MI(MI), Offset(Offset), Position(Position) {}
9298 };
9399 typedef SmallVector MemOpQueue;
94 typedef MemOpQueue::iterator MemOpQueueIter;
95
96 void findUsesOfImpDef(SmallVectorImpl &UsesOfImpDefs,
97 const MemOpQueue &MemOps, unsigned DefReg,
98 unsigned RangeBegin, unsigned RangeEnd);
100
101 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
102 /// merged into a LDM/STM.
103 struct MergeCandidate {
104 /// List of instructions ordered by load/store offset.
105 SmallVector Instrs;
106 /// Index in Instrs of the instruction being latest in the schedule.
107 unsigned LatestMIIdx;
108 /// Index in Instrs of the instruction being earliest in the schedule.
109 unsigned EarliestMIIdx;
110 /// Index into the basic block where the merged instruction will be
111 /// inserted. (See MemOpQueueEntry.Position)
112 unsigned InsertPos;
113 };
114 BumpPtrAllocator Allocator;
115 SmallVector Candidates;
116
117 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
118 MachineBasicBlock::const_iterator Before);
119 unsigned findFreeReg(const TargetRegisterClass &RegClass);
99120 void UpdateBaseRegUses(MachineBasicBlock &MBB,
100121 MachineBasicBlock::iterator MBBI,
101 DebugLoc dl, unsigned Base, unsigned WordOffset,
122 DebugLoc DL, unsigned Base, unsigned WordOffset,
102123 ARMCC::CondCodes Pred, unsigned PredReg);
103 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
104 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
105 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
106 DebugLoc dl,
107 ArrayRef > Regs,
108 ArrayRef ImpDefs);
109 void MergeOpsUpdate(MachineBasicBlock &MBB,
110 MemOpQueue &MemOps,
111 unsigned memOpsBegin,
112 unsigned memOpsEnd,
113 unsigned insertAfter,
114 int Offset,
115 unsigned Base,
116 bool BaseKill,
117 unsigned Opcode,
118 ARMCC::CondCodes Pred,
119 unsigned PredReg,
120 unsigned Scratch,
121 DebugLoc dl,
122 SmallVectorImpl &Merges);
123 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
124 unsigned Opcode, unsigned Size,
125 ARMCC::CondCodes Pred, unsigned PredReg,
126 unsigned Scratch, MemOpQueue &MemOps,
127 SmallVectorImpl &Merges);
128 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
124 MachineInstr *MergeOps(MachineBasicBlock &MBB,
125 MachineBasicBlock::iterator InsertBefore, int Offset,
126 unsigned Base, bool BaseKill, unsigned Opcode,
127 ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
128 ArrayRef> Regs);
129 void FormCandidates(const MemOpQueue &MemOps);
130 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
129131 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
130132 MachineBasicBlock::iterator &MBBI);
131 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
132 MachineBasicBlock::iterator MBBI,
133 const TargetInstrInfo *TII,
134 bool &Advance,
135 MachineBasicBlock::iterator &I);
136 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
137 MachineBasicBlock::iterator MBBI,
138 bool &Advance,
139 MachineBasicBlock::iterator &I);
133 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
134 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
140135 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
141136 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
142137 };
182177 return -Offset;
183178
184179 return Offset;
180 }
181
182 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
183 return MI.getOperand(1);
184 }
185
186 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
187 return MI.getOperand(0);
185188 }
186189
187190 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
347350 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
348351 }
349352
353 static bool isLoadSingle(unsigned Opc) {
354 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
355 }
356
350357 static unsigned getImmScale(unsigned Opc) {
351358 switch (Opc) {
352359 default: llvm_unreachable("Unhandled opcode!");
364371 }
365372 }
366373
367 /// Update future uses of the base register with the offset introduced
368 /// due to writeback. This function only works on Thumb1.
369 void
370 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
371 MachineBasicBlock::iterator MBBI,
372 DebugLoc dl, unsigned Base,
373 unsigned WordOffset,
374 ARMCC::CondCodes Pred, unsigned PredReg) {
375 assert(isThumb1 && "Can only update base register uses for Thumb1!");
376 // Start updating any instructions with immediate offsets. Insert a SUB before
377 // the first non-updateable instruction (if any).
378 for (; MBBI != MBB.end(); ++MBBI) {
379 bool InsertSub = false;
380 unsigned Opc = MBBI->getOpcode();
381
382 if (MBBI->readsRegister(Base)) {
383 int Offset;
384 bool IsLoad =
385 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
386 bool IsStore =
387 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
388
389 if (IsLoad || IsStore) {
390 // Loads and stores with immediate offsets can be updated, but only if
391 // the new offset isn't negative.
392 // The MachineOperand containing the offset immediate is the last one
393 // before predicates.
394 MachineOperand &MO =
395 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
396 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
397 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
398
399 // If storing the base register, it needs to be reset first.
400 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
401
402 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
403 MO.setImm(Offset);
404 else
405 InsertSub = true;
406
407 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
408 !definesCPSR(MBBI)) {
409 // SUBS/ADDS using this register, with a dead def of the CPSR.
410 // Merge it with the update; if the merged offset is too large,
411 // insert a new sub instead.
412 MachineOperand &MO =
413 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
414 Offset = (Opc == ARM::tSUBi8) ?
415 MO.getImm() + WordOffset * 4 :
416 MO.getImm() - WordOffset * 4 ;
417 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
418 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
419 // Offset == 0.
420 MO.setImm(Offset);
421 // The base register has now been reset, so exit early.
422 return;
423 } else {
424 InsertSub = true;
425 }
426
427 } else {
428 // Can't update the instruction.
429 InsertSub = true;
430 }
431
432 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
433 // Since SUBS sets the condition flags, we can't place the base reset
434 // after an instruction that has a live CPSR def.
435 // The base register might also contain an argument for a function call.
436 InsertSub = true;
437 }
438
439 if (InsertSub) {
440 // An instruction above couldn't be updated, so insert a sub.
441 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
442 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
443 return;
444 }
445
446 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
447 // Register got killed. Stop updating.
448 return;
449 }
450
451 // End of block was reached.
452 if (MBB.succ_size() > 0) {
453 // FIXME: Because of a bug, live registers are sometimes missing from
454 // the successor blocks' live-in sets. This means we can't trust that
455 // information and *always* have to reset at the end of a block.
456 // See PR21029.
457 if (MBBI != MBB.end()) --MBBI;
458 AddDefaultT1CC(
459 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
460 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
461 }
462 }
463
464 /// Create and insert a LDM or STM with Base as base register and registers in
465 /// Regs as the register operands that would be loaded / stored. It returns
466 /// true if the transformation is done.
467 bool
468 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
469 MachineBasicBlock::iterator MBBI,
470 int Offset, unsigned Base, bool BaseKill,
471 unsigned Opcode, ARMCC::CondCodes Pred,
472 unsigned PredReg, unsigned Scratch, DebugLoc dl,
473 ArrayRef > Regs,
474 ArrayRef ImpDefs) {
475 // Only a single register to load / store. Don't bother.
476 unsigned NumRegs = Regs.size();
477 if (NumRegs <= 1)
478 return false;
479
480 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
481 // Compute liveness information for that register to make the decision.
482 bool SafeToClobberCPSR = !isThumb1 ||
483 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
484 MachineBasicBlock::LQR_Dead);
485
486 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
487
488 // Exception: If the base register is in the input reglist, Thumb1 LDM is
489 // non-writeback.
490 // It's also not possible to merge an STR of the base register in Thumb1.
491 if (isThumb1)
492 for (const std::pair &R : Regs)
493 if (Base == R.first) {
494 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
495 if (Opcode == ARM::tLDRi) {
496 Writeback = false;
497 break;
498 } else if (Opcode == ARM::tSTRi) {
499 return false;
500 }
501 }
502
503 ARM_AM::AMSubMode Mode = ARM_AM::ia;
504 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
505 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
506 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
507
508 if (Offset == 4 && haveIBAndDA) {
509 Mode = ARM_AM::ib;
510 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
511 Mode = ARM_AM::da;
512 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
513 // VLDM/VSTM do not support DB mode without also updating the base reg.
514 Mode = ARM_AM::db;
515 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
516 // Check if this is a supported opcode before inserting instructions to
517 // calculate a new base register.
518 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
519
520 // If starting offset isn't zero, insert a MI to materialize a new base.
521 // But only do so if it is cost effective, i.e. merging more than two
522 // loads / stores.
523 if (NumRegs <= 2)
524 return false;
525
526 // On Thumb1, it's not worth materializing a new base register without
527 // clobbering the CPSR (i.e. not using ADDS/SUBS).
528 if (!SafeToClobberCPSR)
529 return false;
530
531 unsigned NewBase;
532 if (isi32Load(Opcode)) {
533 // If it is a load, then just use one of the destination register to
534 // use as the new base.
535 NewBase = Regs[NumRegs-1].first;
536 } else {
537 // Use the scratch register to use as a new base.
538 NewBase = Scratch;
539 if (NewBase == 0)
540 return false;
541 }
542
543 int BaseOpc =
544 isThumb2 ? ARM::t2ADDri :
545 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
546 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
547 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
548
549 if (Offset < 0) {
550 Offset = - Offset;
551 BaseOpc =
552 isThumb2 ? ARM::t2SUBri :
553 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
554 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
555 }
556
557 if (!TL->isLegalAddImmediate(Offset))
558 // FIXME: Try add with register operand?
559 return false; // Probably not worth it then.
560
561 if (isThumb1) {
562 // Thumb1: depending on immediate size, use either
563 // ADDS NewBase, Base, #imm3
564 // or
565 // MOV NewBase, Base
566 // ADDS NewBase, #imm8.
567 if (Base != NewBase &&
568 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
569 // Need to insert a MOV to the new base first.
570 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
571 !STI->hasV6Ops()) {
572 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
573 if (Pred != ARMCC::AL)
574 return false;
575 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
576 .addReg(Base, getKillRegState(BaseKill));
577 } else
578 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
579 .addReg(Base, getKillRegState(BaseKill))
580 .addImm(Pred).addReg(PredReg);
581
582 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
583 Base = NewBase;
584 BaseKill = false;
585 }
586 if (BaseOpc == ARM::tADDrSPi) {
587 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
588 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
589 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
590 .addImm(Pred).addReg(PredReg);
591 } else
592 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
593 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
594 .addImm(Pred).addReg(PredReg);
595 } else {
596 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
597 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
598 .addImm(Pred).addReg(PredReg).addReg(0);
599 }
600 Base = NewBase;
601 BaseKill = true; // New base is always killed straight away.
602 }
603
604 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
605 Opcode == ARM::VLDRD);
606
607 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
608 // base register writeback.
609 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
610 if (!Opcode) return false;
611
612 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
613 // - There is no writeback (LDM of base register),
614 // - the base register is killed by the merged instruction,
615 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
616 // to reset the base register.
617 // Otherwise, don't merge.
618 // It's safe to return here since the code to materialize a new base register
619 // above is also conditional on SafeToClobberCPSR.
620 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
621 return false;
622
623 MachineInstrBuilder MIB;
624
625 if (Writeback) {
626 if (Opcode == ARM::tLDMIA)
627 // Update tLDMIA with writeback if necessary.
628 Opcode = ARM::tLDMIA_UPD;
629
630 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
631
632 // Thumb1: we might need to set base writeback when building the MI.
633 MIB.addReg(Base, getDefRegState(true))
634 .addReg(Base, getKillRegState(BaseKill));
635
636 // The base isn't dead after a merged instruction with writeback.
637 // Insert a sub instruction after the newly formed instruction to reset.
638 if (!BaseKill)
639 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
640
641 } else {
642 // No writeback, simply build the MachineInstr.
643 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
644 MIB.addReg(Base, getKillRegState(BaseKill));
645 }
646
647 MIB.addImm(Pred).addReg(PredReg);
648
649 for (const std::pair &R : Regs)
650 MIB = MIB.addReg(R.first, getDefRegState(isDef)
651 | getKillRegState(R.second));
652
653 // Add implicit defs for super-registers.
654 for (unsigned ImpDef : ImpDefs)
655 MIB.addReg(ImpDef, RegState::ImplicitDefine);
656
657 return true;
658 }
659
660 /// Find all instructions using a given imp-def within a range.
661 ///
662 /// We are trying to combine a range of instructions, one of which (located at
663 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
664 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
665 /// and RangeEnd must be modified to use an undefined value.
666 ///
667 /// The live range continues until we find a second definition or one of the
668 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
669 /// we must consider all uses and decide which are relevant in a second pass.
670 void ARMLoadStoreOpt::findUsesOfImpDef(
671 SmallVectorImpl &UsesOfImpDefs, const MemOpQueue &MemOps,
672 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
673 std::map Uses;
674 unsigned LastLivePos = RangeEnd;
675
676 // First we find all uses of this register with Position between RangeBegin
677 // and RangeEnd, any or all of these could be uses of a definition at
678 // RangeBegin. We also record the latest position a definition at RangeBegin
679 // would be considered live.
680 for (unsigned i = 0; i < MemOps.size(); ++i) {
681 MachineInstr &MI = *MemOps[i].MBBI;
682 unsigned MIPosition = MemOps[i].Position;
683 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
684 continue;
685
686 // If this instruction defines the register, then any later use will be of
687 // that definition rather than ours.
688 if (MI.definesRegister(DefReg))
689 LastLivePos = std::min(LastLivePos, MIPosition);
690
691 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
692 if (!UseOp)
693 continue;
694
695 // If this instruction kills the register then (assuming liveness is
696 // correct when we start) we don't need to think about anything after here.
697 if (UseOp->isKill())
698 LastLivePos = std::min(LastLivePos, MIPosition);
699
700 Uses[MIPosition] = UseOp;
701 }
702
703 // Now we traverse the list of all uses, and append the ones that actually use
704 // our definition to the requested list.
705 for (std::map::iterator I = Uses.begin(),
706 E = Uses.end();
707 I != E; ++I) {
708 // List is sorted by position so once we've found one out of range there
709 // will be no more to consider.
710 if (I->first > LastLivePos)
711 break;
712 UsesOfImpDefs.push_back(I->second);
713 }
714 }
715
716 /// Call MergeOps and update MemOps and merges accordingly on success.
717 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
718 MemOpQueue &memOps,
719 unsigned memOpsBegin, unsigned memOpsEnd,
720 unsigned insertAfter, int Offset,
721 unsigned Base, bool BaseKill,
722 unsigned Opcode,
723 ARMCC::CondCodes Pred, unsigned PredReg,
724 unsigned Scratch,
725 DebugLoc dl,
726 SmallVectorImpl &Merges) {
727 // First calculate which of the registers should be killed by the merged
728 // instruction.
729 const unsigned insertPos = memOps[insertAfter].Position;
730 SmallSet KilledRegs;
731 DenseMap Killer;
732 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
733 if (i == memOpsBegin) {
734 i = memOpsEnd;
735 if (i == e)
736 break;
737 }
738 if (memOps[i].Position < insertPos && memOps[i].isKill) {
739 unsigned Reg = memOps[i].Reg;
740 KilledRegs.insert(Reg);
741 Killer[Reg] = i;
742 }
743 }
744
745 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
746 MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0);
747 if (TransferOp.isUse() && TransferOp.getReg() == Base)
748 BaseKill = false;
749 }
750
751 SmallVector, 8> Regs;
752 SmallVector ImpDefs;
753 SmallVector UsesOfImpDefs;
754 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
755 unsigned Reg = memOps[i].Reg;
756 // If we are inserting the merged operation after an operation that
757 // uses the same register, make sure to transfer any kill flag.
758 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
759 Regs.push_back(std::make_pair(Reg, isKill));
760
761 // Collect any implicit defs of super-registers. They must be preserved.
762 for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
763 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
764 continue;
765 unsigned DefReg = MO.getReg();
766 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
767 ImpDefs.push_back(DefReg);
768
769 // There may be other uses of the definition between this instruction and
770 // the eventual LDM/STM position. These should be marked undef if the
771 // merge takes place.
772 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
773 insertPos);
774 }
775 }
776
777 // Try to do the merge.
778 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
779 ++Loc;
780 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
781 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
782 return;
783
784 // Merge succeeded, update records.
785 Merges.push_back(std::prev(Loc));
786
787 // In gathering loads together, we may have moved the imp-def of a register
788 // past one of its uses. This is OK, since we know better than the rest of
789 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
790 // affected uses.
791 for (SmallVectorImpl::iterator I = UsesOfImpDefs.begin(),
792 E = UsesOfImpDefs.end();
793 I != E; ++I)
794 (*I)->setIsUndef();
795
796 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
797 // Remove kill flags from any memops that come before insertPos.
798 if (Regs[i-memOpsBegin].second) {
799 unsigned Reg = Regs[i-memOpsBegin].first;
800 if (KilledRegs.count(Reg)) {
801 unsigned j = Killer[Reg];
802 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
803 assert(Idx >= 0 && "Cannot find killing operand");
804 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
805 memOps[j].isKill = false;
806 }
807 memOps[i].isKill = true;
808 }
809 MBB.erase(memOps[i].MBBI);
810 // Update this memop to refer to the merged instruction.
811 // We may need to move kill flags again.
812 memOps[i].Merged = true;
813 memOps[i].MBBI = Merges.back();
814 memOps[i].Position = insertPos;
815 }
816
817 // Update memOps offsets, since they may have been modified by MergeOps.
818 for (auto &MemOp : memOps) {
819 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
820 }
821 }
822
823 /// Merge a number of load / store instructions into one or more load / store
824 /// multiple instructions.
825 void
826 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
827 unsigned Base, unsigned Opcode, unsigned Size,
828 ARMCC::CondCodes Pred, unsigned PredReg,
829 unsigned Scratch, MemOpQueue &MemOps,
830 SmallVectorImpl &Merges) {
831 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
832 int Offset = MemOps[SIndex].Offset;
833 int SOffset = Offset;
834 unsigned insertAfter = SIndex;
835 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
836 DebugLoc dl = Loc->getDebugLoc();
837 const MachineOperand &PMO = Loc->getOperand(0);
838 unsigned PReg = PMO.getReg();
839 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
840 unsigned Count = 1;
841 unsigned Limit = ~0U;
842 bool BaseKill = false;
843 // vldm / vstm limit are 32 for S variants, 16 for D variants.
844
845 switch (Opcode) {
846 default: break;
847 case ARM::VSTRS:
848 Limit = 32;
849 break;
850 case ARM::VSTRD:
851 Limit = 16;
852 break;
853 case ARM::VLDRD:
854 Limit = 16;
855 break;
856 case ARM::VLDRS:
857 Limit = 32;
858 break;
859 }
860
861 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
862 int NewOffset = MemOps[i].Offset;
863 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
864 unsigned Reg = MO.getReg();
865 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
866 // Register numbers must be in ascending order. For VFP / NEON load and
867 // store multiples, the registers must also be consecutive and within the
868 // limit on the number of registers per instruction.
869 if (Reg != ARM::SP &&
870 NewOffset == Offset + (int)Size &&
871 ((isNotVFP && RegNum > PRegNum) ||
872 ((Count < Limit) && RegNum == PRegNum+1)) &&
873 // On Swift we don't want vldm/vstm to start with a odd register num
874 // because Q register unaligned vldm/vstm need more uops.
875 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
876 Offset += Size;
877 PRegNum = RegNum;
878 ++Count;
879 } else {
880 // Can't merge this in. Try merge the earlier ones first.
881 // We need to compute BaseKill here because the MemOps may have been
882 // reordered.
883 BaseKill = Loc->killsRegister(Base);
884
885 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
886 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
887 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
888 MemOps, Merges);
889 return;
890 }
891
892 if (MemOps[i].Position > MemOps[insertAfter].Position) {
893 insertAfter = i;
894 Loc = MemOps[i].MBBI;
895 }
896 }
897
898 BaseKill = Loc->killsRegister(Base);
899 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
900 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
901 }
902
903 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
904 unsigned Bytes, unsigned Limit,
905 ARMCC::CondCodes Pred, unsigned PredReg) {
906 unsigned MyPredReg = 0;
907 if (!MI)
908 return false;
909
910 bool CheckCPSRDef = false;
911 switch (MI->getOpcode()) {
912 default: return false;
913 case ARM::tSUBi8:
914 case ARM::t2SUBri:
915 case ARM::SUBri:
916 CheckCPSRDef = true;
917 break;
918 case ARM::tSUBspi:
919 break;
920 }
921
922 // Make sure the offset fits in 8 bits.
923 if (Bytes == 0 || (Limit && Bytes >= Limit))
924 return false;
925
926 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
927 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
928 if (!(MI->getOperand(0).getReg() == Base &&
929 MI->getOperand(1).getReg() == Base &&
930 (MI->getOperand(2).getImm() * Scale) == Bytes &&
931 getInstrPredicate(MI, MyPredReg) == Pred &&
932 MyPredReg == PredReg))
933 return false;
934
935 return CheckCPSRDef ? !definesCPSR(MI) : true;
936 }
937
938 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
939 unsigned Bytes, unsigned Limit,
940 ARMCC::CondCodes Pred, unsigned PredReg) {
941 unsigned MyPredReg = 0;
942 if (!MI)
943 return false;
944
945 bool CheckCPSRDef = false;
946 switch (MI->getOpcode()) {
947 default: return false;
948 case ARM::tADDi8:
949 case ARM::t2ADDri:
950 case ARM::ADDri:
951 CheckCPSRDef = true;
952 break;
953 case ARM::tADDspi:
954 break;
955 }
956
957 if (Bytes == 0 || (Limit && Bytes >= Limit))
958 // Make sure the offset fits in 8 bits.
959 return false;
960
961 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
962 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
963 if (!(MI->getOperand(0).getReg() == Base &&
964 MI->getOperand(1).getReg() == Base &&
965 (MI->getOperand(2).getImm() * Scale) == Bytes &&
966 getInstrPredicate(MI, MyPredReg) == Pred &&
967 MyPredReg == PredReg))
968 return false;
969
970 return CheckCPSRDef ? !definesCPSR(MI) : true;
971 }
972
973 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
374 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
974375 switch (MI->getOpcode()) {
975376 default: return 0;
976377 case ARM::LDRi12:
1013414 }
1014415 }
1015416
417 /// Update future uses of the base register with the offset introduced
418 /// due to writeback. This function only works on Thumb1.
419 void
420 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
421 MachineBasicBlock::iterator MBBI,
422 DebugLoc DL, unsigned Base,
423 unsigned WordOffset,
424 ARMCC::CondCodes Pred, unsigned PredReg) {
425 assert(isThumb1 && "Can only update base register uses for Thumb1!");
426 // Start updating any instructions with immediate offsets. Insert a SUB before
427 // the first non-updateable instruction (if any).
428 for (; MBBI != MBB.end(); ++MBBI) {
429 bool InsertSub = false;
430 unsigned Opc = MBBI->getOpcode();
431
432 if (MBBI->readsRegister(Base)) {
433 int Offset;
434 bool IsLoad =
435 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
436 bool IsStore =
437 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
438
439 if (IsLoad || IsStore) {
440 // Loads and stores with immediate offsets can be updated, but only if
441 // the new offset isn't negative.
442 // The MachineOperand containing the offset immediate is the last one
443 // before predicates.
444 MachineOperand &MO =
445 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
446 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
447 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
448
449 // If storing the base register, it needs to be reset first.
450 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
451
452 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
453 MO.setImm(Offset);
454 else
455 InsertSub = true;
456
457 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
458 !definesCPSR(MBBI)) {
459 // SUBS/ADDS using this register, with a dead def of the CPSR.
460 // Merge it with the update; if the merged offset is too large,
461 // insert a new sub instead.
462 MachineOperand &MO =
463 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
464 Offset = (Opc == ARM::tSUBi8) ?
465 MO.getImm() + WordOffset * 4 :
466 MO.getImm() - WordOffset * 4 ;
467 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
468 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
469 // Offset == 0.
470 MO.setImm(Offset);
471 // The base register has now been reset, so exit early.
472 return;
473 } else {
474 InsertSub = true;
475 }
476
477 } else {
478 // Can't update the instruction.
479 InsertSub = true;
480 }
481
482 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
483 // Since SUBS sets the condition flags, we can't place the base reset
484 // after an instruction that has a live CPSR def.
485 // The base register might also contain an argument for a function call.
486 InsertSub = true;
487 }
488
489 if (InsertSub) {
490 // An instruction above couldn't be updated, so insert a sub.
491 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
492 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
493 return;
494 }
495
496 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
497 // Register got killed. Stop updating.
498 return;
499 }
500
501 // End of block was reached.
502 if (MBB.succ_size() > 0) {
503 // FIXME: Because of a bug, live registers are sometimes missing from
504 // the successor blocks' live-in sets. This means we can't trust that
505 // information and *always* have to reset at the end of a block.
506 // See PR21029.
507 if (MBBI != MBB.end()) --MBBI;
508 AddDefaultT1CC(
509 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
510 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
511 }
512 }
513
514 /// Return the first register of class \p RegClass that is not in \p Regs.
515 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
516 if (!RegClassInfoValid) {
517 RegClassInfo.runOnMachineFunction(*MF);
518 RegClassInfoValid = true;
519 }
520
521 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
522 if (!LiveRegs.contains(Reg))
523 return Reg;
524 return 0;
525 }
526
527 /// Compute live registers just before instruction \p Before (in normal schedule
528 /// direction). Computes backwards so multiple queries in the same block must
529 /// come in reverse order.
530 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
531 MachineBasicBlock::const_iterator Before) {
532 // Initialize if we never queried in this block.
533 if (!LiveRegsValid) {
534 LiveRegs.init(TRI);
535 LiveRegs.addLiveOuts(&MBB, true);
536 LiveRegPos = MBB.end();
537 LiveRegsValid = true;
538 }
539 // Move backward just before the "Before" position.
540 while (LiveRegPos != Before) {
541 --LiveRegPos;
542 LiveRegs.stepBackward(*LiveRegPos);
543 }
544 }
545
546 static bool ContainsReg(const ArrayRef> &Regs,
547 unsigned Reg) {
548 for (const std::pair &R : Regs)
549 if (R.first == Reg)
550 return true;
551 return false;
552 }
553
554 /// Create and insert a LDM or STM with Base as base register and registers in
555 /// Regs as the register operands that would be loaded / stored. It returns
556 /// true if the transformation is done.
557 MachineInstr *
558 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
559 MachineBasicBlock::iterator InsertBefore, int Offset,
560 unsigned Base, bool BaseKill, unsigned Opcode,
561 ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
562 ArrayRef> Regs) {
563 unsigned NumRegs = Regs.size();
564 assert(NumRegs > 1);
565
566 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
567 // Compute liveness information for that register to make the decision.
568 bool SafeToClobberCPSR = !isThumb1 ||
569 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
570 MachineBasicBlock::LQR_Dead);
571
572 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
573
574 // Exception: If the base register is in the input reglist, Thumb1 LDM is
575 // non-writeback.
576 // It's also not possible to merge an STR of the base register in Thumb1.
577 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
578 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
579 if (Opcode == ARM::tLDRi) {
580 Writeback = false;
581 } else if (Opcode == ARM::tSTRi) {
582 return nullptr;
583 }
584 }
585
586 ARM_AM::AMSubMode Mode = ARM_AM::ia;
587 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
588 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
589 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
590
591 if (Offset == 4 && haveIBAndDA) {
592 Mode = ARM_AM::ib;
593 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
594 Mode = ARM_AM::da;
595 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
596 // VLDM/VSTM do not support DB mode without also updating the base reg.
597 Mode = ARM_AM::db;
598 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
599 // Check if this is a supported opcode before inserting instructions to
600 // calculate a new base register.
601 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
602
603 // If starting offset isn't zero, insert a MI to materialize a new base.
604 // But only do so if it is cost effective, i.e. merging more than two
605 // loads / stores.
606 if (NumRegs <= 2)
607 return nullptr;
608
609 // On Thumb1, it's not worth materializing a new base register without
610 // clobbering the CPSR (i.e. not using ADDS/SUBS).
611 if (!SafeToClobberCPSR)
612 return nullptr;
613
614 unsigned NewBase;
615 if (isi32Load(Opcode)) {
616 // If it is a load, then just use one of the destination register to
617 // use as the new base.
618 NewBase = Regs[NumRegs-1].first;
619 } else {
620 // Find a free register that we can use as scratch register.
621 moveLiveRegsBefore(MBB, InsertBefore);
622 // The merged instruction does not exist yet but will use several Regs if
623 // it is a Store.
624 if (!isLoadSingle(Opcode))
625 for (const std::pair &R : Regs)
626 LiveRegs.addReg(R.first);
627
628 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
629 if (NewBase == 0)
630 return nullptr;
631 }
632
633 int BaseOpc =
634 isThumb2 ? ARM::t2ADDri :
635 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
636 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
637 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
638
639 if (Offset < 0) {
640 Offset = - Offset;
641 BaseOpc =
642 isThumb2 ? ARM::t2SUBri :
643 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
644 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
645 }
646
647 if (!TL->isLegalAddImmediate(Offset))
648 // FIXME: Try add with register operand?
649 return nullptr; // Probably not worth it then.
650
651 // We can only append a kill flag to the add/sub input if the value is not
652 // used in the register list of the stm as well.
653 bool KillOldBase = BaseKill &&
654 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
655
656 if (isThumb1) {
657 // Thumb1: depending on immediate size, use either
658 // ADDS NewBase, Base, #imm3
659 // or
660 // MOV NewBase, Base
661 // ADDS NewBase, #imm8.
662 if (Base != NewBase &&
663 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
664 // Need to insert a MOV to the new base first.
665 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
666 !STI->hasV6Ops()) {
667 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
668 if (Pred != ARMCC::AL)
669 return nullptr;
670 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
671 .addReg(Base, getKillRegState(KillOldBase));
672 } else
673 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
674 .addReg(Base, getKillRegState(KillOldBase))
675 .addImm(Pred).addReg(PredReg);
676
677 // The following ADDS/SUBS becomes an update.
678 Base = NewBase;
679 KillOldBase = true;
680 }
681 if (BaseOpc == ARM::tADDrSPi) {
682 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
683 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
684 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
685 .addImm(Pred).addReg(PredReg);
686 } else
687 AddDefaultT1CC(
688 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
689 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
690 .addImm(Pred).addReg(PredReg);
691 } else {
692 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
693 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
694 .addImm(Pred).addReg(PredReg).addReg(0);
695 }
696 Base = NewBase;
697 BaseKill = true; // New base is always killed straight away.
698 }
699
700 bool isDef = isLoadSingle(Opcode);
701
702 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
703 // base register writeback.
704 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
705 if (!Opcode)
706 return nullptr;
707
708 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
709 // - There is no writeback (LDM of base register),
710 // - the base register is killed by the merged instruction,
711 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
712 // to reset the base register.
713 // Otherwise, don't merge.
714 // It's safe to return here since the code to materialize a new base register
715 // above is also conditional on SafeToClobberCPSR.
716 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
717 return nullptr;
718
719 MachineInstrBuilder MIB;
720
721 if (Writeback) {
722 if (Opcode == ARM::tLDMIA)
723 // Update tLDMIA with writeback if necessary.
724 Opcode = ARM::tLDMIA_UPD;
725
726 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
727
728 // Thumb1: we might need to set base writeback when building the MI.
729 MIB.addReg(Base, getDefRegState(true))
730 .addReg(Base, getKillRegState(BaseKill));
731
732 // The base isn't dead after a merged instruction with writeback.
733 // Insert a sub instruction after the newly formed instruction to reset.
734 if (!BaseKill)
735 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
736
737 } else {
738 // No writeback, simply build the MachineInstr.
739 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
740 MIB.addReg(Base, getKillRegState(BaseKill));
741 }
742
743 MIB.addImm(Pred).addReg(PredReg);
744
745 for (const std::pair &R : Regs)
746 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
747
748 return MIB.getInstr();
749 }
750
751 /// Call MergeOps and update MemOps and merges accordingly on success.
752 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
753 const MachineInstr *First = Cand.Instrs.front();
754 unsigned Opcode = First->getOpcode();
755 bool IsLoad = isLoadSingle(Opcode);
756 SmallVector, 8> Regs;
757 SmallVector ImpDefs;
758 DenseSet KilledRegs;
759 // Determine list of registers and list of implicit super-register defs.
760 for (const MachineInstr *MI : Cand.Instrs) {
761 const MachineOperand &MO = getLoadStoreRegOp(*MI);
762 unsigned Reg = MO.getReg();
763 bool IsKill = MO.isKill();
764 if (IsKill)
765 KilledRegs.insert(Reg);
766 Regs.push_back(std::make_pair(Reg, IsKill));
767
768 if (IsLoad) {
769 // Collect any implicit defs of super-registers, after merging we can't
770 // be sure anymore that we properly preserved these live ranges and must
771 // removed these implicit operands.
772 for (const MachineOperand &MO : MI->implicit_operands()) {
773 if (!MO.isReg() || !MO.isDef() || MO.isDead())
774 continue;
775 assert(MO.isImplicit());
776 unsigned DefReg = MO.getReg();
777
778 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
779 continue;
780 // We can ignore cases where the super-reg is read and written.
781 if (MI->readsRegister(DefReg))
782 continue;
783 ImpDefs.push_back(DefReg);
784 }
785 }
786 }
787
788 // Attempt the merge.
789 typedef MachineBasicBlock::iterator iterator;
790 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
791 iterator InsertBefore = std::next(iterator(LatestMI));
792 MachineBasicBlock &MBB = *LatestMI->getParent();
793 unsigned Offset = getMemoryOpOffset(First);
794 unsigned Base = getLoadStoreBaseOp(*First).getReg();
795 bool BaseKill = LatestMI->killsRegister(Base);
796 unsigned PredReg = 0;
797 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
798 DebugLoc DL = First->getDebugLoc();
799 MachineInstr *Merged = MergeOps(MBB, InsertBefore, Offset, Base, BaseKill,
800 Opcode, Pred, PredReg, DL, Regs);
801 if (!Merged)
802 return nullptr;
803
804 // Determine earliest instruction that will get removed. We then keep an
805 // iterator just above it so the following erases don't invalidated it.
806 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
807 bool EarliestAtBegin = false;
808 if (EarliestI == MBB.begin()) {
809 EarliestAtBegin = true;
810 } else {
811 EarliestI = std::prev(EarliestI);
812 }
813
814 // Remove instructions which have been merged.
815 for (MachineInstr *MI : Cand.Instrs)
816 MBB.erase(MI);
817
818 // Determine range between the earliest removed instruction and the new one.
819 if (EarliestAtBegin)
820 EarliestI = MBB.begin();
821 else
822 EarliestI = std::next(EarliestI);
823 auto FixupRange = make_range(EarliestI, iterator(Merged));
824
825 if (isLoadSingle(Opcode)) {
826 // If the previous loads defined a super-reg, then we have to mark earlier
827 // operands undef; Replicate the super-reg def on the merged instruction.
828 for (MachineInstr &MI : FixupRange) {
829 for (unsigned &ImpDefReg : ImpDefs) {
830 for (MachineOperand &MO : MI.implicit_operands()) {
831 if (!MO.isReg() || MO.getReg() != ImpDefReg)
832 continue;
833 if (MO.readsReg())
834 MO.setIsUndef();
835 else if (MO.isDef())
836 ImpDefReg = 0;
837 }
838 }
839 }
840
841 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
842 for (unsigned ImpDef : ImpDefs)
843 MIB.addReg(ImpDef, RegState::ImplicitDefine);
844 } else {
845 // Remove kill flags: We are possibly storing the values later now.
846 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
847 for (MachineInstr &MI : FixupRange) {
848 for (MachineOperand &MO : MI.uses()) {
849 if (!MO.isReg() || !MO.isKill())
850 continue;
851 if (KilledRegs.count(MO.getReg()))
852 MO.setIsKill(false);
853 }
854 }
855 assert(ImpDefs.empty());
856 }
857
858 return Merged;
859 }
860
861 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
862 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
863 const MachineInstr *FirstMI = MemOps[0].MI;
864 unsigned Opcode = FirstMI->getOpcode();
865 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
866 unsigned Size = getLSMultipleTransferSize(FirstMI);
867 // vldm / vstm limit are 32 for S variants, 16 for D variants.
868 unsigned Limit;
869 switch (Opcode) {
870 default:
871 Limit = UINT_MAX;
872 break;
873 case ARM::VSTRS:
874 Limit = 32;
875 break;
876 case ARM::VSTRD:
877 Limit = 16;
878 break;
879 case ARM::VLDRD:
880 Limit = 16;
881 break;
882 case ARM::VLDRS:
883 Limit = 32;
884 break;
885 }
886
887 unsigned SIndex = 0;
888 unsigned EIndex = MemOps.size();
889 do {
890 // Look at the first instruction.
891 const MachineInstr *MI = MemOps[SIndex].MI;
892 int Offset = MemOps[SIndex].Offset;
893 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
894 unsigned PReg = PMO.getReg();
895 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
896 unsigned Latest = SIndex;
897 unsigned Earliest = SIndex;
898 unsigned Count = 1;
899
900 // Merge additional instructions fulfilling LDM/STM constraints.
901 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
902 int NewOffset = MemOps[I].Offset;
903 if (NewOffset != Offset + (int)Size)
904 break;
905 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
906 unsigned Reg = MO.getReg();
907 if (Reg == ARM::SP)
908 break;
909 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
910 // Register numbers must be in ascending order.
911 if (RegNum <= PRegNum)
912 break;
913 // For VFP / NEON load/store multiples, the registers must be consecutive
914 // and within the limit on the number of registers per instruction.
915 if (!isNotVFP && RegNum != PRegNum+1)
916 break;
917 // On Swift we don't want vldm/vstm to start with a odd register num
918 // because Q register unaligned vldm/vstm need more uops.
919 if (!isNotVFP && STI->isSwift() && Count == 1 && (PRegNum % 2) == 1)
920 break;
921
922 // Track MemOp with latest and earliest position (Positions are
923 // counted in reverse).
924 unsigned Position = MemOps[I].Position;
925 if (Position < MemOps[Latest].Position)
926 Latest = I;
927 else if (Position > MemOps[Earliest].Position)
928 Earliest = I;
929 // Prepare for next MemOp.
930 Offset += Size;
931 PRegNum = RegNum;
932 }
933
934 // Form a candidate from the Ops collected so far.
935 MergeCandidate *Candidate = new(Allocator) MergeCandidate;
936 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
937 Candidate->Instrs.push_back(MemOps[C].MI);
938 Candidate->LatestMIIdx = Latest - SIndex;
939 Candidate->EarliestMIIdx = Earliest - SIndex;
940 Candidate->InsertPos = MemOps[Latest].Position;
941 Candidates.push_back(Candidate);
942 // Continue after the chain.
943 SIndex += Count;
944 } while (SIndex < EIndex);
945 }
946
947 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
948 unsigned Bytes, unsigned Limit,
949 ARMCC::CondCodes Pred, unsigned PredReg) {
950 unsigned MyPredReg = 0;
951 if (!MI)
952 return false;
953
954 bool CheckCPSRDef = false;
955 switch (MI->getOpcode()) {
956 default: return false;
957 case ARM::tSUBi8:
958 case ARM::t2SUBri:
959 case ARM::SUBri:
960 CheckCPSRDef = true;
961 break;
962 case ARM::tSUBspi:
963 break;
964 }
965
966 // Make sure the offset fits in 8 bits.
967 if (Bytes == 0 || (Limit && Bytes >= Limit))
968 return false;
969
970 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
971 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
972 if (!(MI->getOperand(0).getReg() == Base &&
973 MI->getOperand(1).getReg() == Base &&
974 (MI->getOperand(2).getImm() * Scale) == Bytes &&
975 getInstrPredicate(MI, MyPredReg) == Pred &&
976 MyPredReg == PredReg))
977 return false;
978
979 return CheckCPSRDef ? !definesCPSR(MI) : true;
980 }
981
982 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
983 unsigned Bytes, unsigned Limit,
984 ARMCC::CondCodes Pred, unsigned PredReg) {
985 unsigned MyPredReg = 0;
986 if (!MI)
987 return false;
988
989 bool CheckCPSRDef = false;
990 switch (MI->getOpcode()) {
991 default: return false;
992 case ARM::tADDi8:
993 case ARM::t2ADDri:
994 case ARM::ADDri:
995 CheckCPSRDef = true;
996 break;
997 case ARM::tADDspi:
998 break;
999 }
1000
1001 if (Bytes == 0 || (Limit && Bytes >= Limit))
1002 // Make sure the offset fits in 8 bits.
1003 return false;
1004
1005 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
1006 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
1007 if (!(MI->getOperand(0).getReg() == Base &&
1008 MI->getOperand(1).getReg() == Base &&
1009 (MI->getOperand(2).getImm() * Scale) == Bytes &&
1010 getInstrPredicate(MI, MyPredReg) == Pred &&
1011 MyPredReg == PredReg))
1012 return false;
1013
1014 return CheckCPSRDef ? !definesCPSR(MI) : true;
1015 }
1016
10161017 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
10171018 ARM_AM::AMSubMode Mode) {
10181019 switch (Opc) {
10921093 /// ldmia rn,
10931094 /// =>
10941095 /// ldmdb rn!,
1095 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1096 MachineBasicBlock::iterator MBBI,
1097 bool &Advance,
1098 MachineBasicBlock::iterator &I) {
1096 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
10991097 // Thumb1 is already using updating loads/stores.
11001098 if (isThumb1) return false;
11011099
1102 MachineInstr *MI = MBBI;
1103 unsigned Base = MI->getOperand(0).getReg();
1104 bool BaseKill = MI->getOperand(0).isKill();
1100 const MachineOperand &BaseOP = MI->getOperand(0);
1101 unsigned Base = BaseOP.getReg();
1102 bool BaseKill = BaseOP.isKill();
11051103 unsigned Bytes = getLSMultipleTransferSize(MI);
11061104 unsigned PredReg = 0;
11071105 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
11081106 unsigned Opcode = MI->getOpcode();
1109 DebugLoc dl = MI->getDebugLoc();
1107 DebugLoc DL = MI->getDebugLoc();
11101108
11111109 // Can't use an updating ld/st if the base register is also a dest
11121110 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
11181116 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
11191117
11201118 // Try merging with the previous instruction.
1119 MachineBasicBlock &MBB = *MI->getParent();
11211120 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1121 MachineBasicBlock::iterator MBBI(MI);
11221122 if (MBBI != BeginMBBI) {
11231123 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
11241124 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
11491149 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
11501150 DoMerge = true;
11511151 }
1152 if (DoMerge) {
1153 if (NextMBBI == I) {
1154 Advance = true;
1155 ++I;
1156 }
1152 if (DoMerge)
11571153 MBB.erase(NextMBBI);
1158 }
11591154 }
11601155
11611156 if (!DoMerge)
11621157 return false;
11631158
11641159 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1165 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1160 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
11661161 .addReg(Base, getDefRegState(true)) // WB base register
11671162 .addReg(Base, getKillRegState(BaseKill))
11681163 .addImm(Pred).addReg(PredReg);
12301225
12311226 /// Fold proceeding/trailing inc/dec of base register into the
12321227 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1233 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1234 MachineBasicBlock::iterator MBBI,
1235 const TargetInstrInfo *TII,
1236 bool &Advance,
1237 MachineBasicBlock::iterator &I) {
1228 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
12381229 // Thumb1 doesn't have updating LDR/STR.
12391230 // FIXME: Use LDM/STM with single register instead.
12401231 if (isThumb1) return false;
12411232
1242 MachineInstr *MI = MBBI;
1243 unsigned Base = MI->getOperand(1).getReg();
1244 bool BaseKill = MI->getOperand(1).isKill();
1233 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1234 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
12451235 unsigned Bytes = getLSMultipleTransferSize(MI);
12461236 unsigned Opcode = MI->getOpcode();
1247 DebugLoc dl = MI->getDebugLoc();
1237 DebugLoc DL = MI->getDebugLoc();
12481238 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
12491239 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
12501240 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
12541244 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
12551245 return false;
12561246
1257 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1247 bool isLd = isLoadSingle(Opcode);
12581248 // Can't do the merge if the destination register is the same as the would-be
12591249 // writeback register.
12601250 if (MI->getOperand(0).getReg() == Base)
12691259 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
12701260
12711261 // Try merging with the previous instruction.
1262 MachineBasicBlock &MBB = *MI->getParent();
12721263 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1264 MachineBasicBlock::iterator MBBI(MI);
12731265 if (MBBI != BeginMBBI) {
12741266 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
12751267 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
13021294 }
13031295 if (DoMerge) {
13041296 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1305 if (NextMBBI == I) {
1306 Advance = true;
1307 ++I;
1308 }
13091297 MBB.erase(NextMBBI);
13101298 }
13111299 }
13191307 // updating load/store-multiple instructions can be used with only one
13201308 // register.)
13211309 MachineOperand &MO = MI->getOperand(0);
1322 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1310 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
13231311 .addReg(Base, getDefRegState(true)) // WB base register
13241312 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
13251313 .addImm(Pred).addReg(PredReg)
13301318 // LDR_PRE, LDR_POST
13311319 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
13321320 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1333 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1321 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
13341322 .addReg(Base, RegState::Define)
13351323 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
13361324 } else {
13371325 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1338 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1326 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
13391327 .addReg(Base, RegState::Define)
13401328 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
13411329 }
13421330 } else {
13431331 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
13441332 // t2LDR_PRE, t2LDR_POST
1345 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1333 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
13461334 .addReg(Base, RegState::Define)
13471335 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
13481336 }
13541342 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
13551343 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
13561344 // STR_PRE, STR_POST
1357 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1345 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
13581346 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
13591347 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
13601348 } else {
13611349 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
13621350 // t2STR_PRE, t2STR_POST
1363 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1351 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
13641352 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
13651353 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
13661354 }
14251413 return false;
14261414 }
14271415
1428 /// Advance register scavenger to just before the earliest memory op that is
1429 /// being merged.
1430 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1431 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1432 unsigned Position = MemOps[0].Position;
1433 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1434 if (MemOps[i].Position < Position) {
1435 Position = MemOps[i].Position;
1436 Loc = MemOps[i].MBBI;
1437 }
1438 }
1439
1440 if (Loc != MBB.begin())
1441 RS->forward(std::prev(Loc));
1442 }
1443
14441416 static void InsertLDR_STR(MachineBasicBlock &MBB,
14451417 MachineBasicBlock::iterator &MBBI,
14461418 int Offset, bool isDef,
1447 DebugLoc dl, unsigned NewOpc,
1419 DebugLoc DL, unsigned NewOpc,
14481420 unsigned Reg, bool RegDeadKill, bool RegUndef,
14491421 unsigned BaseReg, bool BaseKill, bool BaseUndef,
14501422 bool OffKill, bool OffUndef,
14901462 if (!Errata602117 && !NonConsecutiveRegs)
14911463 return false;
14921464
1493 MachineBasicBlock::iterator NewBBI = MBBI;
14941465 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
14951466 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
14961467 bool EvenDeadKill = isLd ?
15301501 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
15311502 ++NumSTRD2STM;
15321503 }
1533 NewBBI = std::prev(MBBI);
15341504 } else {
15351505 // Split into two instructions.
15361506 unsigned NewOpc = (isLd)
15521522 OddReg, OddDeadKill, false,
15531523 BaseReg, false, BaseUndef, false, OffUndef,
15541524 Pred, PredReg, TII, isT2);
1555 NewBBI = std::prev(MBBI);
15561525 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
15571526 EvenReg, EvenDeadKill, false,
15581527 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
15721541 EvenReg, EvenDeadKill, EvenUndef,
15731542 BaseReg, false, BaseUndef, false, OffUndef,
15741543 Pred, PredReg, TII, isT2);
1575 NewBBI = std::prev(MBBI);
15761544 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
15771545 OddReg, OddDeadKill, OddUndef,
15781546 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
15841552 ++NumSTRD2STR;
15851553 }
15861554
1587 MBB.erase(MI);
1588 MBBI = NewBBI;
1555 MBBI = MBB.erase(MBBI);
15891556 return true;
15901557 }
15911558
15921559 /// An optimization pass to turn multiple LDR / STR ops of the same base and
15931560 /// incrementing offset into LDM / STM ops.
15941561 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1595 unsigned NumMerges = 0;
1596 unsigned NumMemOps = 0;
15971562 MemOpQueue MemOps;
15981563 unsigned CurrBase = 0;
15991564 unsigned CurrOpc = ~0u;
16011566 ARMCC::CondCodes CurrPred = ARMCC::AL;
16021567 unsigned CurrPredReg = 0;
16031568 unsigned Position = 0;
1604 SmallVector Merges;
1605
1606 RS->enterBasicBlock(&MBB);
1607 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1608 while (MBBI != E) {
1569 assert(Candidates.size() == 0);
1570 LiveRegsValid = false;
1571
1572 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1573 I = MBBI) {
1574 // The instruction in front of the iterator is the one we look at.
1575 MBBI = std::prev(I);
16091576 if (FixInvalidRegPairOp(MBB, MBBI))
16101577 continue;
1611
1612 bool Advance = false;
1613 bool TryMerge = false;
1614
1615 bool isMemOp = isMemoryOp(MBBI);
1616 if (isMemOp) {
1578 ++Position;
1579
1580 if (isMemoryOp(MBBI)) {
16171581 unsigned Opcode = MBBI->getOpcode();
16181582 unsigned Size = getLSMultipleTransferSize(MBBI);
16191583 const MachineOperand &MO = MBBI->getOperand(0);
16201584 unsigned Reg = MO.getReg();
1621 bool isKill = MO.isDef() ? false : MO.isKill();
1622 unsigned Base = MBBI->getOperand(1).getReg();
1585 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
16231586 unsigned PredReg = 0;
16241587 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
16251588 int Offset = getMemoryOpOffset(MBBI);
1626 // Watch out for:
1627 // r4 := ldr [r5]
1628 // r5 := ldr [r5, #4]
1629 // r6 := ldr [r5, #8]
1630 //
1631 // The second ldr has effectively broken the chain even though it
1632 // looks like the later ldr(s) use the same base register. Try to
1633 // merge the ldr's so far, including this one. But don't try to
1634 // combine the following ldr(s).
1635 bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
1636
1637 // Watch out for:
1638 // r4 := ldr [r0, #8]
1639 // r4 := ldr [r0, #4]
1640 //
1641 // The optimization may reorder the second ldr in front of the first
1642 // ldr, which violates write after write(WAW) dependence. The same as
1643 // str. Try to merge inst(s) already in MemOps.
1644 bool Overlap = false;
1645 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1646 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1647 Overlap = true;
1648 break;
1649 }
1650 }
1651
1652 if (CurrBase == 0 && !Clobber) {
1589 if (CurrBase == 0) {
16531590 // Start of a new chain.
16541591 CurrBase = Base;
16551592 CurrOpc = Opcode;
16561593 CurrSize = Size;
16571594 CurrPred = Pred;
16581595 CurrPredReg = PredReg;
1659 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1660 ++NumMemOps;
1661 Advance = true;
1662 } else if (!Overlap) {
1663 if (Clobber) {
1664 TryMerge = true;
1665 Advance = true;
1666 }
1667
1668 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1669 // No need to match PredReg.
1670 // Continue adding to the queue.
1671 if (Offset > MemOps.back().Offset) {
1672 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1673 Position, MBBI));
1674 ++NumMemOps;
1675 Advance = true;
1676 } else {
1677 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1678 I != E; ++I) {
1679 if (Offset < I->Offset) {
1680 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1681 Position, MBBI));
1682 ++NumMemOps;
1683 Advance = true;
1684 break;
1685 } else if (Offset == I->Offset) {
1686 // Collision! This can't be merged!
1596 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1597 continue;
1598 }
1599 // Note: No need to match PredReg in the next if.
1600 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1601 // Watch out for:
1602 // r4 := ldr [r0, #8]
1603 // r4 := ldr [r0, #4]
1604 // or
1605 // r0 := ldr [r0]
1606 // If a load overrides the base register or a register loaded by
1607 // another load in our chain, we cannot take this instruction.
1608 bool Overlap = false;
1609 if (isLoadSingle(Opcode)) {
1610 Overlap = (Base == Reg);
1611 if (!Overlap) {
1612 for (const MemOpQueueEntry &E : MemOps) {
1613 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1614 Overlap = true;
16871615 break;
16881616 }
16891617 }
16901618 }
16911619 }
1692 }
1693 }
1694
1695 if (MBBI->isDebugValue()) {
1696 ++MBBI;
1697 if (MBBI == E)
1698 // Reach the end of the block, try merging the memory instructions.
1699 TryMerge = true;
1700 } else if (Advance) {
1701 ++Position;
1702 ++MBBI;
1703 if (MBBI == E)
1704 // Reach the end of the block, try merging the memory instructions.
1705 TryMerge = true;
1706 } else {
1707 TryMerge = true;
1708 }
1709
1710 if (TryMerge) {
1711 if (NumMemOps > 1) {
1712 // Try to find a free register to use as a new base in case it's needed.
1713 // First advance to the instruction just before the start of the chain.
1714 AdvanceRS(MBB, MemOps);
1715
1716 // Find a scratch register.
1717 unsigned Scratch =
1718 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1719
1720 // Process the load / store instructions.
1721 RS->forward(std::prev(MBBI));
1722
1723 // Merge ops.
1724 Merges.clear();
1725 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1726 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1727
1728 // Try folding preceding/trailing base inc/dec into the generated
1729 // LDM/STM ops.
1730 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1731 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1732 ++NumMerges;
1733 NumMerges += Merges.size();
1734
1735 // Try folding preceding/trailing base inc/dec into those load/store
1736 // that were not merged to form LDM/STM ops.
1737 for (unsigned i = 0; i != NumMemOps; ++i)
1738 if (!MemOps[i].Merged)
1739 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1740 ++NumMerges;
1741
1742 // RS may be pointing to an instruction that's deleted.
1743 RS->skipTo(std::prev(MBBI));
1744 } else if (NumMemOps == 1) {
1745 // Try folding preceding/trailing base inc/dec into the single
1746 // load/store.
1747 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1748 ++NumMerges;
1749 RS->forward(std::prev(MBBI));
1620
1621 if (!Overlap) {
1622 // Check offset and sort memory operation into the current chain.
1623 if (Offset > MemOps.back().Offset) {
1624 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1625 continue;
1626 } else {
1627 MemOpQueue::iterator MI, ME;
1628 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1629 if (Offset < MI->Offset) {
1630 // Found a place to insert.
1631 break;
1632 }
1633 if (Offset == MI->Offset) {
1634 // Collision, abort.
1635 MI = ME;
1636 break;
1637 }
1638 }
1639 if (MI != MemOps.end()) {
1640 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1641 continue;
1642 }
1643 }
17501644 }
17511645 }
17521646
1647 // Don't advance the iterator; The op will start a new chain next.
1648 MBBI = I;
1649 --Position;
1650 // Fallthrough to look into existing chain.
1651 } else if (MBBI->isDebugValue())
1652 continue;
1653
1654 // If we are here then the chain is broken; Extract candidates for a merge.
1655 if (MemOps.size() > 0) {
1656 FormCandidates(MemOps);
1657 // Reset for the next chain.
17531658 CurrBase = 0;
17541659 CurrOpc = ~0u;
17551660 CurrSize = 0;
17561661 CurrPred = ARMCC::AL;
17571662 CurrPredReg = 0;
1758 if (NumMemOps) {
1759 MemOps.clear();
1760 NumMemOps = 0;
1663 MemOps.clear();
1664 }
1665 }
1666 if (MemOps.size() > 0)
1667 FormCandidates(MemOps);
1668
1669 // Sort candidates so they get processed from end to begin of the basic
1670 // block later; This is necessary for liveness calculation.
1671 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1672 return M0->InsertPos < M1->InsertPos;
1673 };
1674 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1675
1676 // Go through list of candidates and merge.
1677 bool Changed = false;
1678 for (const MergeCandidate *Candidate : Candidates) {
1679 if (Candidate->Instrs.size() > 1) {
1680 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1681 // Merge preceding/trailing base inc/dec into the merged op.
1682 if (Merged) {
1683 MergeBaseUpdateLSMultiple(Merged);
1684 Changed = true;
1685 } else {
1686 for (MachineInstr *MI : Candidate->Instrs) {
1687 if (MergeBaseUpdateLoadStore(MI))
1688 Changed = true;
1689 }
17611690 }
1762
1763 // If iterator hasn't been advanced and this is not a memory op, skip it.
1764 // It can't start a new chain anyway.
1765 if (!Advance && !isMemOp && MBBI != E) {
1766 ++Position;
1767 ++MBBI;
1768 }
1769 }
1770 }
1771 return NumMerges > 0;
1691 } else {
1692 assert(Candidate->Instrs.size() == 1);
1693 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1694 Changed = true;
1695 }
1696 }
1697 Candidates.clear();
1698
1699 return Changed;
17721700 }
17731701
17741702 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
18131741 }
18141742
18151743 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1744 MF = &Fn;
18161745 STI = &static_cast(Fn.getSubtarget());
18171746 TL = STI->getTargetLowering();
18181747 AFI = Fn.getInfo();
18191748 TII = STI->getInstrInfo();
18201749 TRI = STI->getRegisterInfo();
1821 RS = new RegScavenger();
1750 MRI = &Fn.getRegInfo();
1751 RegClassInfoValid = false;
18221752 isThumb2 = AFI->isThumb2Function();
18231753 isThumb1 = AFI->isThumbFunction() && !isThumb2;
18241754
18311761 Modified |= MergeReturnIntoLDM(MBB);
18321762 }
18331763
1834 delete RS;
1764 Allocator.Reset();
18351765 return Modified;
18361766 }
18371767
22182148 continue;
22192149
22202150 int Opc = MI->getOpcode();
2221 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2151 bool isLd = isLoadSingle(Opc);
22222152 unsigned Base = MI->getOperand(1).getReg();
22232153 int Offset = getMemoryOpOffset(MI);
22242154
88 ) {
99 ;CHECK: sub sp, sp, #16
1010 ;CHECK: push {r11, lr}
11 ;CHECK: add r11, sp, #8
12 ;CHECK: stm r11, {r0, r1, r2, r3}
11 ;CHECK: add r12, sp, #8
12 ;CHECK: stm r12, {r0, r1, r2, r3}
1313 ;CHECK: add r0, sp, #12
1414 ;CHECK: bl useInt
1515 ;CHECK: pop {r11, lr}