llvm.org GIT mirror llvm / 438669c
[AArch64] Add experimental PBQP support This adds target specific support for using the PBQP register allocator on the AArch64, for the A57 cpu. By default, the PBQP allocator is not used, unless explicitely required on the command line with "-aarch64-pbqp". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217504 91177308-0d34-0410-b5e6-96231b3b80d8 Arnaud A. de Grandmaison 6 years ago
6 changed file(s) with 449 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
3838 FunctionPass *createAArch64ConditionOptimizerPass();
3939 FunctionPass *createAArch64AddressTypePromotionPass();
4040 FunctionPass *createAArch64A57FPLoadBalancing();
41 FunctionPass *createAArch64A57PBQPRegAlloc();
4142 /// \brief Creates an ARM-specific Target Transformation Info pass.
4243 ImmutablePass *
4344 createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM);
0 //===-- AArch64PBQPRegAlloc.cpp - AArch64 specific PBQP constraints -------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains the AArch64 / Cortex-A57 specific register allocation
9 // constraints for use by the PBQP register allocator.
10 //
11 // It is essentially a transcription of what is contained in
12 // AArch64A57FPLoadBalancing, which tries to use a balanced
13 // mix of odd and even D-registers when performing a critical sequence of
14 // independent, non-quadword FP/ASIMD floating-point multiply-accumulates.
15 //===----------------------------------------------------------------------===//
16
17 #define DEBUG_TYPE "aarch64-pbqp"
18
19 #include "AArch64.h"
20 #include "AArch64RegisterInfo.h"
21
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegAllocPBQP.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/raw_ostream.h"
31
32 #define PBQP_BUILDER PBQPBuilderWithCoalescing
33 //#define PBQP_BUILDER PBQPBuilder
34
35 using namespace llvm;
36
37 namespace {
38
39 bool isFPReg(unsigned reg) {
40 return AArch64::FPR32RegClass.contains(reg) ||
41 AArch64::FPR64RegClass.contains(reg) ||
42 AArch64::FPR128RegClass.contains(reg);
43 };
44
45 bool isOdd(unsigned reg) {
46 switch (reg) {
47 default:
48 llvm_unreachable("Register is not from the expected class !");
49 case AArch64::S1:
50 case AArch64::S3:
51 case AArch64::S5:
52 case AArch64::S7:
53 case AArch64::S9:
54 case AArch64::S11:
55 case AArch64::S13:
56 case AArch64::S15:
57 case AArch64::S17:
58 case AArch64::S19:
59 case AArch64::S21:
60 case AArch64::S23:
61 case AArch64::S25:
62 case AArch64::S27:
63 case AArch64::S29:
64 case AArch64::S31:
65 case AArch64::D1:
66 case AArch64::D3:
67 case AArch64::D5:
68 case AArch64::D7:
69 case AArch64::D9:
70 case AArch64::D11:
71 case AArch64::D13:
72 case AArch64::D15:
73 case AArch64::D17:
74 case AArch64::D19:
75 case AArch64::D21:
76 case AArch64::D23:
77 case AArch64::D25:
78 case AArch64::D27:
79 case AArch64::D29:
80 case AArch64::D31:
81 case AArch64::Q1:
82 case AArch64::Q3:
83 case AArch64::Q5:
84 case AArch64::Q7:
85 case AArch64::Q9:
86 case AArch64::Q11:
87 case AArch64::Q13:
88 case AArch64::Q15:
89 case AArch64::Q17:
90 case AArch64::Q19:
91 case AArch64::Q21:
92 case AArch64::Q23:
93 case AArch64::Q25:
94 case AArch64::Q27:
95 case AArch64::Q29:
96 case AArch64::Q31:
97 return true;
98 case AArch64::S0:
99 case AArch64::S2:
100 case AArch64::S4:
101 case AArch64::S6:
102 case AArch64::S8:
103 case AArch64::S10:
104 case AArch64::S12:
105 case AArch64::S14:
106 case AArch64::S16:
107 case AArch64::S18:
108 case AArch64::S20:
109 case AArch64::S22:
110 case AArch64::S24:
111 case AArch64::S26:
112 case AArch64::S28:
113 case AArch64::S30:
114 case AArch64::D0:
115 case AArch64::D2:
116 case AArch64::D4:
117 case AArch64::D6:
118 case AArch64::D8:
119 case AArch64::D10:
120 case AArch64::D12:
121 case AArch64::D14:
122 case AArch64::D16:
123 case AArch64::D18:
124 case AArch64::D20:
125 case AArch64::D22:
126 case AArch64::D24:
127 case AArch64::D26:
128 case AArch64::D28:
129 case AArch64::D30:
130 case AArch64::Q0:
131 case AArch64::Q2:
132 case AArch64::Q4:
133 case AArch64::Q6:
134 case AArch64::Q8:
135 case AArch64::Q10:
136 case AArch64::Q12:
137 case AArch64::Q14:
138 case AArch64::Q16:
139 case AArch64::Q18:
140 case AArch64::Q20:
141 case AArch64::Q22:
142 case AArch64::Q24:
143 case AArch64::Q26:
144 case AArch64::Q28:
145 case AArch64::Q30:
146 return false;
147
148 }
149 }
150
151 bool haveSameParity(unsigned reg1, unsigned reg2) {
152 assert(isFPReg(reg1) && "Expecting an FP register for reg1");
153 assert(isFPReg(reg2) && "Expecting an FP register for reg2");
154
155 return isOdd(reg1) == isOdd(reg2);
156 }
157
158 class A57PBQPBuilder : public PBQP_BUILDER {
159 public:
160 A57PBQPBuilder() : PBQP_BUILDER(), TRI(nullptr), LIs(nullptr), Chains() {}
161
162 // Build a PBQP instance to represent the register allocation problem for
163 // the given MachineFunction.
164 std::unique_ptr
165 build(MachineFunction *MF, const LiveIntervals *LI,
166 const MachineBlockFrequencyInfo *blockInfo,
167 const RegSet &VRegs) override;
168
169 private:
170 const AArch64RegisterInfo *TRI;
171 const LiveIntervals *LIs;
172 SmallSetVector Chains;
173
174 // Return true if reg is a physical register
175 bool isPhysicalReg(unsigned reg) const {
176 return TRI->isPhysicalRegister(reg);
177 }
178
179 // Add the accumulator chaining constraint, inside the chain, i.e. so that
180 // parity(Rd) == parity(Ra).
181 // \return true if a constraint was added
182 bool addIntraChainConstraint(PBQPRAProblem *p, unsigned Rd, unsigned Ra);
183
184 // Add constraints between existing chains
185 void addInterChainConstraint(PBQPRAProblem *p, unsigned Rd, unsigned Ra);
186 };
187 } // Anonymous namespace
188
189 bool A57PBQPBuilder::addIntraChainConstraint(PBQPRAProblem *p, unsigned Rd,
190 unsigned Ra) {
191 if (Rd == Ra)
192 return false;
193
194 if (isPhysicalReg(Rd) || isPhysicalReg(Ra)) {
195 dbgs() << "Rd is a physical reg:" << isPhysicalReg(Rd) << '\n';
196 dbgs() << "Ra is a physical reg:" << isPhysicalReg(Ra) << '\n';
197 return false;
198 }
199
200 const PBQPRAProblem::AllowedSet *vRdAllowed = &p->getAllowedSet(Rd);
201 const PBQPRAProblem::AllowedSet *vRaAllowed = &p->getAllowedSet(Ra);
202
203 PBQPRAGraph &g = p->getGraph();
204 PBQPRAGraph::NodeId node1 = p->getNodeForVReg(Rd);
205 PBQPRAGraph::NodeId node2 = p->getNodeForVReg(Ra);
206 PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2);
207
208 // The edge does not exist. Create one with the appropriate interference
209 // costs.
210 if (edge == g.invalidEdgeId()) {
211 const LiveInterval &ld = LIs->getInterval(Rd);
212 const LiveInterval &la = LIs->getInterval(Ra);
213 bool livesOverlap = ld.overlaps(la);
214
215 PBQP::Matrix costs(vRdAllowed->size() + 1, vRaAllowed->size() + 1, 0);
216 for (unsigned i = 0; i != vRdAllowed->size(); ++i) {
217 unsigned pRd = (*vRdAllowed)[i];
218 for (unsigned j = 0; j != vRaAllowed->size(); ++j) {
219 unsigned pRa = (*vRaAllowed)[j];
220 if (livesOverlap && TRI->regsOverlap(pRd, pRa))
221 costs[i + 1][j + 1] = std::numeric_limits::infinity();
222 else
223 costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0;
224 }
225 }
226 g.addEdge(node1, node2, std::move(costs));
227 return true;
228 }
229
230 if (g.getEdgeNode1Id(edge) == node2) {
231 std::swap(node1, node2);
232 std::swap(vRdAllowed, vRaAllowed);
233 }
234
235 // Enforce minCost(sameParity(RaClass)) > maxCost(otherParity(RdClass))
236 PBQP::Matrix costs(g.getEdgeCosts(edge));
237 for (unsigned i = 0; i != vRdAllowed->size(); ++i) {
238 unsigned pRd = (*vRdAllowed)[i];
239
240 // Get the maximum cost (excluding unallocatable reg) for same parity
241 // registers
242 PBQP::PBQPNum sameParityMax = std::numeric_limits::min();
243 for (unsigned j = 0; j != vRaAllowed->size(); ++j) {
244 unsigned pRa = (*vRaAllowed)[j];
245 if (haveSameParity(pRd, pRa))
246 if (costs[i + 1][j + 1] !=
247 std::numeric_limits::infinity() &&
248 costs[i + 1][j + 1] > sameParityMax)
249 sameParityMax = costs[i + 1][j + 1];
250 }
251
252 // Ensure all registers with a different parity have a higher cost
253 // than sameParityMax
254 for (unsigned j = 0; j != vRaAllowed->size(); ++j) {
255 unsigned pRa = (*vRaAllowed)[j];
256 if (!haveSameParity(pRd, pRa))
257 if (sameParityMax > costs[i + 1][j + 1])
258 costs[i + 1][j + 1] = sameParityMax + 1.0;
259 }
260 }
261 g.setEdgeCosts(edge, costs);
262
263 return true;
264 }
265
266 void
267 A57PBQPBuilder::addInterChainConstraint(PBQPRAProblem *p, unsigned Rd,
268 unsigned Ra) {
269 // Do some Chain management
270 if (Chains.count(Ra)) {
271 if (Rd != Ra) {
272 DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to "
273 << PrintReg(Rd, TRI) << '\n';);
274 Chains.remove(Ra);
275 Chains.insert(Rd);
276 }
277 } else {
278 DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI)
279 << '\n';);
280 Chains.insert(Rd);
281 }
282
283 const LiveInterval &ld = LIs->getInterval(Rd);
284 for (auto r : Chains) {
285 // Skip self
286 if (r == Rd)
287 continue;
288
289 const LiveInterval &lr = LIs->getInterval(r);
290 if (ld.overlaps(lr)) {
291 const PBQPRAProblem::AllowedSet *vRdAllowed = &p->getAllowedSet(Rd);
292 const PBQPRAProblem::AllowedSet *vRrAllowed = &p->getAllowedSet(r);
293
294 PBQPRAGraph &g = p->getGraph();
295 PBQPRAGraph::NodeId node1 = p->getNodeForVReg(Rd);
296 PBQPRAGraph::NodeId node2 = p->getNodeForVReg(r);
297 PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2);
298 assert(edge != g.invalidEdgeId() &&
299 "PBQP error ! The edge should exist !");
300
301 DEBUG(dbgs() << "Refining constraint !\n";);
302
303 if (g.getEdgeNode1Id(edge) == node2) {
304 std::swap(node1, node2);
305 std::swap(vRdAllowed, vRrAllowed);
306 }
307
308 // Enforce that cost is higher with all other Chains of the same parity
309 PBQP::Matrix costs(g.getEdgeCosts(edge));
310 for (unsigned i = 0; i != vRdAllowed->size(); ++i) {
311 unsigned pRd = (*vRdAllowed)[i];
312
313 // Get the maximum cost (excluding unallocatable reg) for all other
314 // parity registers
315 PBQP::PBQPNum sameParityMax = std::numeric_limits::min();
316 for (unsigned j = 0; j != vRrAllowed->size(); ++j) {
317 unsigned pRa = (*vRrAllowed)[j];
318 if (!haveSameParity(pRd, pRa))
319 if (costs[i + 1][j + 1] !=
320 std::numeric_limits::infinity() &&
321 costs[i + 1][j + 1] > sameParityMax)
322 sameParityMax = costs[i + 1][j + 1];
323 }
324
325 // Ensure all registers with same parity have a higher cost
326 // than sameParityMax
327 for (unsigned j = 0; j != vRrAllowed->size(); ++j) {
328 unsigned pRa = (*vRrAllowed)[j];
329 if (haveSameParity(pRd, pRa))
330 if (sameParityMax > costs[i + 1][j + 1])
331 costs[i + 1][j + 1] = sameParityMax + 1.0;
332 }
333 }
334 g.setEdgeCosts(edge, costs);
335 }
336 }
337 }
338
339 std::unique_ptr
340 A57PBQPBuilder::build(MachineFunction *MF, const LiveIntervals *LI,
341 const MachineBlockFrequencyInfo *blockInfo,
342 const RegSet &VRegs) {
343 std::unique_ptr p =
344 PBQP_BUILDER::build(MF, LI, blockInfo, VRegs);
345
346 TRI = static_cast(
347 MF->getTarget().getSubtargetImpl()->getRegisterInfo());
348 LIs = LI;
349
350 DEBUG(MF->dump(););
351
352 for (MachineFunction::const_iterator mbbItr = MF->begin(), mbbEnd = MF->end();
353 mbbItr != mbbEnd; ++mbbItr) {
354 const MachineBasicBlock *MBB = &*mbbItr;
355 Chains.clear(); // FIXME: really needed ? Could not work at MF level ?
356
357 for (MachineBasicBlock::const_iterator miItr = MBB->begin(),
358 miEnd = MBB->end();
359 miItr != miEnd; ++miItr) {
360 const MachineInstr *MI = &*miItr;
361 switch (MI->getOpcode()) {
362 case AArch64::FMSUBSrrr:
363 case AArch64::FMADDSrrr:
364 case AArch64::FNMSUBSrrr:
365 case AArch64::FNMADDSrrr:
366 case AArch64::FMSUBDrrr:
367 case AArch64::FMADDDrrr:
368 case AArch64::FNMSUBDrrr:
369 case AArch64::FNMADDDrrr: {
370 unsigned Rd = MI->getOperand(0).getReg();
371 unsigned Ra = MI->getOperand(3).getReg();
372
373 if (addIntraChainConstraint(p.get(), Rd, Ra))
374 addInterChainConstraint(p.get(), Rd, Ra);
375 break;
376 }
377
378 case AArch64::FMLAv2f32:
379 case AArch64::FMLSv2f32: {
380 unsigned Rd = MI->getOperand(0).getReg();
381 addInterChainConstraint(p.get(), Rd, Rd);
382 break;
383 }
384
385 default:
386 // Forget Chains which have been killed
387 for (auto r : Chains) {
388 SmallVector toDel;
389 if (MI->killsRegister(r)) {
390 DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at ";
391 MI->print(dbgs()););
392 toDel.push_back(r);
393 }
394
395 while (!toDel.empty()) {
396 Chains.remove(toDel.back());
397 toDel.pop_back();
398 }
399 }
400 }
401 }
402 }
403
404 return p;
405 }
406
407 // Factory function used by AArch64TargetMachine to add the pass to the
408 // passmanager.
409 FunctionPass *llvm::createAArch64A57PBQPRegAlloc() {
410 std::unique_ptr builder = llvm::make_unique();
411 return createPBQPRegisterAllocator(std::move(builder), nullptr);
412 }
1212 #include "AArch64.h"
1313 #include "AArch64TargetMachine.h"
1414 #include "llvm/CodeGen/Passes.h"
15 #include "llvm/CodeGen/RegAllocRegistry.h"
1516 #include "llvm/PassManager.h"
1617 #include "llvm/Support/CommandLine.h"
1718 #include "llvm/Support/TargetRegistry.h"
7273 cl::desc("Enable the condition optimizer pass"),
7374 cl::init(true), cl::Hidden);
7475
76 static cl::opt
77 EnablePBQP("aarch64-pbqp", cl::Hidden,
78 cl::desc("Use PBQP register allocator (experimental)"),
79 cl::init(false));
7580
7681 extern "C" void LLVMInitializeAArch64Target() {
7782 // Register the target.
8994 CodeGenOpt::Level OL,
9095 bool LittleEndian)
9196 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
92 Subtarget(TT, CPU, FS, *this, LittleEndian) {
97 Subtarget(TT, CPU, FS, *this, LittleEndian),
98 usingPBQP(false) {
9399 initAsmInfo();
100
101 if (EnablePBQP && Subtarget.isCortexA57() && OL != CodeGenOpt::None) {
102 usingPBQP = true;
103 RegisterRegAlloc::setDefault(createAArch64A57PBQPRegAlloc);
104 }
94105 }
95106
96107 void AArch64leTargetMachine::anchor() { }
215226 if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
216227 addPass(createAArch64DeadRegisterDefinitions());
217228 if (TM->getOptLevel() != CodeGenOpt::None &&
218 TM->getSubtarget().isCortexA57())
229 TM->getSubtarget().isCortexA57() &&
230 !static_cast(TM)->isPBQPUsed())
219231 // Improve performance for some FP/SIMD code for A57.
220232 addPass(createAArch64A57FPLoadBalancing());
221233 return true;
3939
4040 /// \brief Register AArch64 analysis passes with a pass manager.
4141 void addAnalysisPasses(PassManagerBase &PM) override;
42
43 /// \brief Query if the PBQP register allocator is being used
44 bool isPBQPUsed() const { return usingPBQP; }
45
46 private:
47 bool usingPBQP;
4248 };
4349
4450 // AArch64leTargetMachine - AArch64 little endian target machine.
3333 AArch64LoadStoreOptimizer.cpp
3434 AArch64MCInstLower.cpp
3535 AArch64PromoteConstant.cpp
36 AArch64PBQPRegAlloc.cpp
3637 AArch64RegisterInfo.cpp
3738 AArch64SelectionDAGInfo.cpp
3839 AArch64StorePairSuppress.cpp
0 ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -aarch64-pbqp -o - %s | FileCheck %s
1
2 define i32 @foo(i32 %a) {
3 ; CHECK-LABEL: foo:
4 ; CHECK: bl bar
5 ; CHECK-NEXT: bl baz
6 %call = call i32 @bar(i32 %a)
7 %call1 = call i32 @baz(i32 %call)
8 ret i32 %call1
9 }
10
11 declare i32 @bar(i32)
12 declare i32 @baz(i32)
13