llvm.org GIT mirror llvm / 7a06a3c
[PowerPC] Move TOC save to prologue when profitable The indirect call sequence on PPC requires that the TOC base register be saved prior to the indirect call and restored after the call since the indirect call may branch to a global entry point in another DSO which will update the TOC base. Over the last couple of years, we have improved this to: - be able to hoist TOC saves from loops (with changes to MachineLICM) - avoid multiple saves when one dominates the other[s] However, it is still possible to have multiple TOC saves dynamically in the execution path if there is no dominance relationship between them. This patch moves the TOC save to the prologue when one of the TOC saves is in a block that post-dominates entry (i.e. it cannot be avoided) or if it is in a block that is hotter than entry. Differential revision: https://reviews.llvm.org/D63803 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365232 91177308-0d34-0410-b5e6-96231b3b80d8 Nemanja Ivanovic 4 months ago
6 changed file(s) with 94 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
463463 bool UseEstimate,
464464 unsigned *NewMaxCallFrameSize) const {
465465 const MachineFrameInfo &MFI = MF.getFrameInfo();
466 const PPCFunctionInfo *FI = MF.getInfo();
466467
467468 // Get the number of bytes to allocate from the FrameInfo
468469 unsigned FrameSize =
480481 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
481482 !MFI.adjustsStack() && // No calls.
482483 !MustSaveLR(MF, LR) && // No need to save LR.
484 !FI->mustSaveTOC() && // No need to save TOC.
483485 !RegInfo->hasBasePointer(MF); // No special alignment.
484486
485487 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
807809 // Check if the link register (LR) must be saved.
808810 PPCFunctionInfo *FI = MF.getInfo();
809811 bool MustSaveLR = FI->mustSaveLR();
812 bool MustSaveTOC = FI->mustSaveTOC();
810813 const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs();
811814 bool MustSaveCR = !MustSaveCRs.empty();
812815 // Do we have a frame pointer and/or base pointer for this function?
818821 unsigned BPReg = RegInfo->getBaseRegister(MF);
819822 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
820823 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
824 unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
821825 unsigned ScratchReg = 0;
822826 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
823827 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
10891093 .addReg(SPReg)
10901094 .addReg(ScratchReg);
10911095 HasSTUX = true;
1096 }
1097
1098 // Save the TOC register after the stack pointer update if a prologue TOC
1099 // save is required for the function.
1100 if (MustSaveTOC) {
1101 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1102 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1103 .addReg(TOCReg, getKillRegState(true))
1104 .addImm(TOCSaveOffset)
1105 .addReg(SPReg);
10921106 }
10931107
10941108 if (!HasRedZone) {
12921306 if (PPC::CRBITRCRegClass.contains(Reg))
12931307 continue;
12941308
1309 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1310 continue;
1311
12951312 // For SVR4, don't emit a move for the CR spill slot if we haven't
12961313 // spilled CRs.
12971314 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
18381855 unsigned MinFPR = PPC::F31;
18391856 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
18401857
1858 PPCFunctionInfo *FI = MF.getInfo();
18411859 bool HasGPSaveArea = false;
18421860 bool HasG8SaveArea = false;
18431861 bool HasFPSaveArea = false;
18441862 bool HasVRSAVESaveArea = false;
18451863 bool HasVRSaveArea = false;
1864 bool MustSaveTOC = FI->mustSaveTOC();
18461865
18471866 SmallVector GPRegs;
18481867 SmallVector G8Regs;
18511870
18521871 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
18531872 unsigned Reg = CSI[i].getReg();
1873 assert((!MustSaveTOC || (Reg != PPC::X2 && Reg != PPC::R2)) &&
1874 "Not expecting to try to spill R2 in a function that must save TOC");
18541875 if (PPC::GPRCRegClass.contains(Reg) ||
18551876 PPC::SPE4RCRegClass.contains(Reg)) {
18561877 HasGPSaveArea = true;
21602181
21612182 MachineFunction *MF = MBB.getParent();
21622183 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2184 PPCFunctionInfo *FI = MF->getInfo();
2185 bool MustSaveTOC = FI->mustSaveTOC();
21632186 DebugLoc DL;
21642187 bool CRSpilled = false;
21652188 MachineInstrBuilder CRMIB;
21892212 CRMIB.addReg(Reg, RegState::ImplicitKill);
21902213 continue;
21912214 }
2215
2216 // The actual spill will happen in the prologue.
2217 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2218 continue;
21922219
21932220 // Insert the spill to the stack frame.
21942221 if (IsCRField) {
23172344
23182345 MachineFunction *MF = MBB.getParent();
23192346 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2347 PPCFunctionInfo *FI = MF->getInfo();
2348 bool MustSaveTOC = FI->mustSaveTOC();
23202349 bool CR2Spilled = false;
23212350 bool CR3Spilled = false;
23222351 bool CR4Spilled = false;
23372366 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
23382367 // Darwin, ignore it.
23392368 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2369 continue;
2370
2371 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
23402372 continue;
23412373
23422374 if (Reg == PPC::CR2) {
2020 #include "PPC.h"
2121 #include "PPCInstrBuilder.h"
2222 #include "PPCInstrInfo.h"
23 #include "PPCMachineFunctionInfo.h"
2324 #include "PPCTargetMachine.h"
2425 #include "llvm/ADT/Statistic.h"
26 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
2527 #include "llvm/CodeGen/MachineDominators.h"
28 #include "llvm/CodeGen/MachinePostDominators.h"
2629 #include "llvm/CodeGen/MachineFunctionPass.h"
2730 #include "llvm/CodeGen/MachineInstrBuilder.h"
2831 #include "llvm/CodeGen/MachineRegisterInfo.h"
3639 STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
3740 STATISTIC(MultiTOCSaves,
3841 "Number of functions with multiple TOC saves that must be kept");
42 STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
3943 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
4044 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
4145 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
8387
8488 private:
8589 MachineDominatorTree *MDT;
90 MachinePostDominatorTree *MPDT;
91 MachineBlockFrequencyInfo *MBFI;
92 uint64_t EntryFreq;
8693
8794 // Initialize class variables.
8895 void initialize(MachineFunction &MFParm);
101108
102109 void getAnalysisUsage(AnalysisUsage &AU) const override {
103110 AU.addRequired();
111 AU.addRequired();
112 AU.addRequired();
104113 AU.addPreserved();
114 AU.addPreserved();
115 AU.addPreserved();
105116 MachineFunctionPass::getAnalysisUsage(AU);
106117 }
107118
119130 MF = &MFParm;
120131 MRI = &MF->getRegInfo();
121132 MDT = &getAnalysis();
133 MPDT = &getAnalysis();
134 MBFI = &getAnalysis();
135 EntryFreq = MBFI->getEntryFreq();
122136 TII = MF->getSubtarget().getInstrInfo();
123137 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
124138 LLVM_DEBUG(MF->dump());
199213 void PPCMIPeephole::UpdateTOCSaves(
200214 std::map &TOCSaves, MachineInstr *MI) {
201215 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
216 assert(MF->getSubtarget().isELFv2ABI() &&
217 "TOC-save removal only supported on ELFv2");
218 PPCFunctionInfo *FI = MF->getInfo();
219 MachineFrameInfo &MFI = MF->getFrameInfo();
220
221 MachineBasicBlock *Entry = &MF->front();
222 uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
223
224 // If the block in which the TOC save resides is in a block that
225 // post-dominates Entry, or a block that is hotter than entry (keep in mind
226 // that early MachineLICM has already run so the TOC save won't be hoisted)
227 // we can just do the save in the prologue.
228 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
229 FI->setMustSaveTOC(true);
230
231 // If we are saving the TOC in the prologue, all the TOC saves can be removed
232 // from the code.
233 if (FI->mustSaveTOC()) {
234 for (auto &TOCSave : TOCSaves)
235 TOCSave.second = false;
236 // Add new instruction to map.
237 TOCSaves[MI] = false;
238 return;
239 }
240
202241 bool Keep = true;
203242 for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
204243 MachineInstr *CurrInst = It->first;
776815
777816 // Eliminate all the TOC save instructions which are redundant.
778817 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
818 PPCFunctionInfo *FI = MF->getInfo();
819 if (FI->mustSaveTOC())
820 NumTOCSavesInPrologue++;
821
779822 // We try to eliminate redundant compare instruction.
780823 Simplified |= eliminateRedundantCompare();
781824
13401383
13411384 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
13421385 "PowerPC MI Peephole Optimization", false, false)
1386 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
1387 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1388 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
13431389 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
13441390 "PowerPC MI Peephole Optimization", false, false)
13451391
4242 /// function. This is only valid after the initial scan of the function by
4343 /// PEI.
4444 bool MustSaveLR;
45
46 /// MustSaveTOC - Indicates that the TOC save needs to be performed in the
47 /// prologue of the function. This is typically the case when there are
48 /// indirect calls in the function and it is more profitable to save the
49 /// TOC pointer in the prologue than in the block(s) containing the call(s).
50 bool MustSaveTOC = false;
4551
4652 /// Do we have to disable shrink-wrapping? This has to be set if we emit any
4753 /// instructions that clobber LR in the entry block because discovering this
150156 void setMustSaveLR(bool U) { MustSaveLR = U; }
151157 bool mustSaveLR() const { return MustSaveLR; }
152158
159 void setMustSaveTOC(bool U) { MustSaveTOC = U; }
160 bool mustSaveTOC() const { return MustSaveTOC; }
161
153162 /// We certainly don't want to shrink wrap functions if we've emitted a
154163 /// MovePCtoLR8 as that has to go into the entry, so the prologue definitely
155164 /// has to go into the entry block.
2020 ; CHECK-NEXT: std 30, -16(1)
2121 ; CHECK-NEXT: std 0, 16(1)
2222 ; CHECK-NEXT: stdu 1, -48(1)
23 ; CHECK-NEXT: std 2, 24(1)
24 ; CHECK-NEXT: mr 30, 3
2325 ; CHECK-NEXT: ld 12, 0(3)
24 ; CHECK-NEXT: mr 30, 3
25 ; CHECK-NEXT: std 2, 24(1)
2626 ; CHECK-NEXT: mtctr 12
2727 ; CHECK-NEXT: bctrl
2828 ; CHECK-NEXT: ld 2, 24(1)
3535 ; Check for multiple TOC saves with if then else where neither dominates the other.
3636 define signext i32 @test3(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) {
3737 ; CHECK-LABEL: test3:
38 ; CHECK: std 2, 24(1)
3938 ; CHECK: std 2, 24(1)
4039 ; CHECK-NOT: std 2, 24(1)
4140 entry:
8685 entry:
8786 ; CHECK-LABEL: test5:
8887 ; CHECK: std 2, 24(1)
89 ; CHECK: std 2, 24(1)
9088
9189 %tobool = icmp eq i32 %i, 0
9290 br i1 %tobool, label %if.end, label %if.then
1818 ; CHECK-NEXT: cmpwi cr1, r4, 11
1919 ; CHECK-NEXT: mr r30, r3
2020 ; CHECK-NEXT: extsw r28, r4
21 ; CHECK-NEXT: std r2, 24(r1)
2122 ; CHECK-NEXT: cmpwi r29, 1
2223 ; CHECK-NEXT: cror 4*cr5+lt, lt, 4*cr1+lt
23 ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_3
24 ; CHECK-NEXT: # %bb.1: # %for.body.us.preheader
25 ; CHECK-NEXT: std r2, 24(r1)
24 ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_2
2625 ; CHECK-NEXT: .p2align 5
27 ; CHECK-NEXT: .LBB0_2: # %for.body.us
26 ; CHECK-NEXT: .LBB0_1: # %for.body.us
2827 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
2928 ; CHECK-NEXT: mtctr r30
3029 ; CHECK-NEXT: mr r3, r28
3332 ; CHECK-NEXT: ld 2, 24(r1)
3433 ; CHECK-NEXT: addi r29, r29, -1
3534 ; CHECK-NEXT: cmplwi r29, 0
36 ; CHECK-NEXT: bne cr0, .LBB0_2
37 ; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup
35 ; CHECK-NEXT: bne cr0, .LBB0_1
36 ; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup
3837 ; CHECK-NEXT: mtctr r30
3938 ; CHECK-NEXT: mr r3, r28
4039 ; CHECK-NEXT: mr r12, r30
41 ; CHECK-NEXT: std r2, 24(r1)
4240 ; CHECK-NEXT: bctrl
4341 ; CHECK-NEXT: ld 2, 24(r1)
4442 ; CHECK-NEXT: addi r1, r1, 64