llvm.org GIT mirror llvm / f6a4d3c
Avoid write-after-write issue hazards for Cortex-A9. Add a avoidWriteAfterWrite() target hook to identify register classes that suffer from write-after-write hazards. For those register classes, try to avoid writing the same register in two consecutive instructions. This is currently disabled by default. We should not spill to avoid hazards! The command line flag -avoid-waw-hazard can be used to enable waw avoidance. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129772 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 8 years ago
9 changed file(s) with 65 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
623623 return 0;
624624 }
625625
626 /// avoidWriteAfterWrite - Return true if the register allocator should avoid
627 /// writing a register from RC in two consecutive instructions.
628 /// This can avoid pipeline stalls on certain architectures.
629 /// It does cause increased register pressure, though.
630 virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
631 return false;
632 }
633
626634 /// UpdateRegAllocHint - A callback to allow target a chance to update
627635 /// register allocation hints when a register is "changed" (e.g. coalesced)
628636 /// to another register. e.g. On ARM, some virtual registers should target
6666 cl::desc("Attempt trivial coalescing of interval ends"),
6767 cl::init(false), cl::Hidden);
6868
69 static cl::opt
70 AvoidWAWHazard("avoid-waw-hazard",
71 cl::desc("Avoid write-write hazards for some register classes"),
72 cl::init(false), cl::Hidden);
73
6974 static RegisterRegAlloc
7075 linearscanRegAlloc("linearscan", "linear scan register allocator",
7176 createLinearScanRegisterAllocator);
109114 if (NumRecentlyUsedRegs > 0)
110115 RecentRegs.resize(NumRecentlyUsedRegs, 0);
111116 RecentNext = RecentRegs.begin();
117 avoidWAW_ = 0;
112118 }
113119
114120 typedef std::pair IntervalPtr;
178184 // The queue of recently-used registers.
179185 SmallVector RecentRegs;
180186 SmallVector::iterator RecentNext;
187
188 // Last write-after-write register written.
189 unsigned avoidWAW_;
181190
182191 // Record that we just picked this register.
183192 void recordRecentlyUsed(unsigned reg) {
226235
227236 // Determine if we skip this register due to its being recently used.
228237 bool isRecentlyUsed(unsigned reg) const {
229 return std::find(RecentRegs.begin(), RecentRegs.end(), reg) !=
230 RecentRegs.end();
238 return reg == avoidWAW_ ||
239 std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
231240 }
232241
233242 private:
11151124 active_.push_back(std::make_pair(cur, cur->begin()));
11161125 handled_.push_back(cur);
11171126
1127 // Remember physReg for avoiding a write-after-write hazard in the next
1128 // instruction.
1129 if (AvoidWAWHazard &&
1130 tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
1131 avoidWAW_ = physReg;
1132
11181133 // "Upgrade" the physical register since it has been allocated.
11191134 UpgradeRegister(physReg);
11201135 if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
14451460 if (reservedRegs_.test(Reg))
14461461 continue;
14471462 // Skip recently allocated registers.
1448 if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
1463 if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
14491464 FreeReg = Reg;
14501465 if (FreeReg < inactiveCounts.size())
14511466 FreeRegInactiveCount = inactiveCounts[FreeReg];
14761491 if (reservedRegs_.test(Reg))
14771492 continue;
14781493 if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
1479 FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
1494 FreeRegInactiveCount < inactiveCounts[Reg] &&
1495 (!SkipDGRegs || !isRecentlyUsed(Reg))) {
14801496 FreeReg = Reg;
14811497 FreeRegInactiveCount = inactiveCounts[Reg];
14821498 if (FreeRegInactiveCount == MaxInactiveCount)
15271543 return Preference;
15281544 }
15291545
1530 if (!DowngradedRegs.empty()) {
1531 unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
1532 true);
1533 if (FreeReg)
1534 return FreeReg;
1535 }
1546 unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
1547 true);
1548 if (FreeReg)
1549 return FreeReg;
15361550 return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
15371551 }
15381552
553553 }
554554 }
555555
556 bool
557 ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
558 // CortexA9 has a Write-after-write hazard for NEON registers.
559 if (!STI.isCortexA9())
560 return false;
561
562 switch (RC->getID()) {
563 case ARM::DPRRegClassID:
564 case ARM::DPR_8RegClassID:
565 case ARM::DPR_VFP2RegClassID:
566 case ARM::QPRRegClassID:
567 case ARM::QPR_8RegClassID:
568 case ARM::QPR_VFP2RegClassID:
569 case ARM::SPRRegClassID:
570 case ARM::SPR_8RegClassID:
571 // Avoid reusing S, D, and Q registers.
572 // Don't increase register pressure for QQ and QQQQ.
573 return true;
574 default:
575 return false;
576 }
577 }
578
556579 bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
557580 const MachineFrameInfo *MFI = MF.getFrameInfo();
558581 const ARMFunctionInfo *AFI = MF.getInfo();
140140
141141 void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
142142 MachineFunction &MF) const;
143
144 virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
143145
144146 bool hasBasePointer(const MachineFunction &MF) const;
145147
2323 ; CORTEXA8: test:
2424 ; CORTEXA8: vabs.f32 d1, d1
2525 ; CORTEXA9: test:
26 ; CORTEXA9: vabs.f32 s1, s1
26 ; CORTEXA9: vabs.f32 s{{.}}, s{{.}}
1919 ; CORTEXA8: test:
2020 ; CORTEXA8: vadd.f32 d0, d1, d0
2121 ; CORTEXA9: test:
22 ; CORTEXA9: vadd.f32 s0, s1, s0
22 ; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}}
1919 ; CORTEXA8: test:
2020 ; CORTEXA8: vdiv.f32 s0, s1, s0
2121 ; CORTEXA9: test:
22 ; CORTEXA9: vdiv.f32 s0, s1, s0
22 ; CORTEXA9: vdiv.f32 s{{.}}, s{{.}}, s{{.}}
1919 ; CORTEXA8: test:
2020 ; CORTEXA8: vmul.f32 d0, d1, d0
2121 ; CORTEXA9: test:
22 ; CORTEXA9: vmul.f32 s0, s1, s0
22 ; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}}
44
55 define i32 @test1(float %a, float %b) {
66 ; VFP2: test1:
7 ; VFP2: vcvt.s32.f32 s0, s0
7 ; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
88 ; NEON: test1:
99 ; NEON: vcvt.s32.f32 d0, d0
1010 entry:
1515
1616 define i32 @test2(float %a, float %b) {
1717 ; VFP2: test2:
18 ; VFP2: vcvt.u32.f32 s0, s0
18 ; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
1919 ; NEON: test2:
2020 ; NEON: vcvt.u32.f32 d0, d0
2121 entry:
2626
2727 define float @test3(i32 %a, i32 %b) {
2828 ; VFP2: test3:
29 ; VFP2: vcvt.f32.u32 s0, s0
29 ; VFP2: vcvt.f32.u32 s{{.}}, s{{.}}
3030 ; NEON: test3:
3131 ; NEON: vcvt.f32.u32 d0, d0
3232 entry:
3737
3838 define float @test4(i32 %a, i32 %b) {
3939 ; VFP2: test4:
40 ; VFP2: vcvt.f32.s32 s0, s0
40 ; VFP2: vcvt.f32.s32 s{{.}}, s{{.}}
4141 ; NEON: test4:
4242 ; NEON: vcvt.f32.s32 d0, d0
4343 entry: