llvm.org GIT mirror llvm / c4a91bf
RegUsageInfoCollector: Skip AMDGPU entry point functions I'm not sure if it's worth it or not to add a hook to disable the pass for an arbitrary function. This pass is taking up to 5% of compile time in tiny programs by iterating through all of the physical registers in every register class. This pass should be rewritten in terms of regunits. For now, skip doing anything for entry point functions. The vast majority of functions in the real world aren't callable, so just not running this will give the majority of the benefit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365255 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 4 months ago
4 changed file(s) with 51 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
7676 return new RegUsageInfoCollector();
7777 }
7878
79 // TODO: Move to hook somwehere?
80
81 // Return true if it is useful to track the used registers for IPRA / no CSR
82 // optimizations. This is not useful for entry points, and computing the
83 // register usage information is expensive.
84 static bool isCallableFunction(const MachineFunction &MF) {
85 switch (MF.getFunction().getCallingConv()) {
86 case CallingConv::AMDGPU_VS:
87 case CallingConv::AMDGPU_GS:
88 case CallingConv::AMDGPU_PS:
89 case CallingConv::AMDGPU_CS:
90 case CallingConv::AMDGPU_KERNEL:
91 return false;
92 default:
93 return true;
94 }
95 }
96
7997 bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
8098 MachineRegisterInfo *MRI = &MF.getRegInfo();
8199 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
82100 const LLVMTargetMachine &TM = MF.getTarget();
83101
84102 LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
85 << " -------------------- \n");
86 LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");
103 << " -------------------- \nFunction Name : "
104 << MF.getName() << '\n');
105
106 // Analyzing the register usage may be expensive on some targets.
107 if (!isCallableFunction(MF)) {
108 LLVM_DEBUG(dbgs() << "Not analyzing non-callable function\n");
109 return false;
110 }
111
112 // If there are no callers, there's no point in computing more precise
113 // register usage here.
114 if (MF.getFunction().use_empty()) {
115 LLVM_DEBUG(dbgs() << "Not analyzing function with no callers\n");
116 return false;
117 }
87118
88119 std::vector RegMask;
89120
109140 };
110141 // Scan all the physical registers. When a register is defined in the current
111142 // function set it and all the aliasing registers as defined in the regmask.
143 // FIXME: Rewrite to use regunits.
112144 for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
113145 // Don't count registers that are saved and restored.
114146 if (SavedRegs.test(PReg))
134166 << " function optimized for not having CSR.\n");
135167 }
136168
137 for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
138 if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
139 LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " ");
140
141 LLVM_DEBUG(dbgs() << " \n----------------------------------------\n");
169 LLVM_DEBUG(
170 for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
171 if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
172 dbgs() << printReg(PReg, TRI) << " ";
173 }
174
175 dbgs() << " \n----------------------------------------\n";
176 );
142177
143178 PRUI.storeUpdateRegUsageInfo(F, RegMask);
144179
164199 }
165200
166201 // Insert any register fully saved via subregisters.
202 // FIXME: Rewrite to use regunits.
167203 for (const TargetRegisterClass *RC : TRI.regclasses()) {
168204 if (!RC->CoveredBySubRegs)
169205 continue;
1010 call void @bar2()
1111 ret void
1212 }
13
14 @llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)]
15
1316 declare void @bar2()
1417 attributes #0 = {nounwind}
1616 ret void
1717 }
1818
19 @llvm.used = appending global [2 x i8*] [i8* bitcast (void ()* @foo to i8*), i8* bitcast (void ()* @bar to i8*)]
20
1921 attributes #0 = { nounwind }
88 ret void
99 }
1010 declare void @bar2()
11
12 @llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)]
13
1114 attributes #0 = {nounwind}