llvm.org GIT mirror llvm / 4a74b3b
Inflate register classes after coalescing. Coalescing can remove copy-like instructions with sub-register operands that constrained the register class. Examples are: x86: GR32_ABCD:sub_8bit_hi -> GR32 arm: DPR_VFP2:ssub0 -> DPR Recompute the register class of any virtual registers that are used by less instructions after coalescing. This affects code generation for the Cortex-A8 where we use NEON instructions for f32 operations, c.f. fp_convert.ll: vadd.f32 d16, d1, d0 vcvt.s32.f32 d0, d16 The register allocator is now free to use d16 for the temporary, and that comes first in the allocation order because it doesn't interfere with any s-registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137133 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 8 years ago
3 changed file(s) with 42 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
5454 STATISTIC(NumReMats , "Number of instructions re-materialized");
5555 STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
5656 STATISTIC(numAborts , "Number of times interval joining aborted");
57 STATISTIC(NumInflated , "Number of register classes inflated");
5758
5859 static cl::opt
5960 EnableJoining("join-liveintervals",
18511852
18521853 // Perform a final pass over the instructions and compute spill weights
18531854 // and remove identity moves.
1854 SmallVector DeadDefs;
1855 SmallVector DeadDefs, InflateRegs;
18551856 for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
18561857 mbbi != mbbe; ++mbbi) {
18571858 MachineBasicBlock* mbb = mbbi;
18631864 bool DoDelete = true;
18641865 assert(MI->isCopyLike() && "Unrecognized copy instruction");
18651866 unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
1867 unsigned DstReg = MI->getOperand(0).getReg();
1868
1869 // Collect candidates for register class inflation.
1870 if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
1871 RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg)))
1872 InflateRegs.push_back(SrcReg);
1873 if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
1874 RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg)))
1875 InflateRegs.push_back(DstReg);
1876
18661877 if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
18671878 MI->getNumOperands() > 2)
18681879 // Do not delete extract_subreg, insert_subreg of physical
19041915 unsigned Reg = MO.getReg();
19051916 if (!Reg)
19061917 continue;
1907 if (TargetRegisterInfo::isVirtualRegister(Reg))
1918 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
19081919 DeadDefs.push_back(Reg);
1920 // Remat may also enable register class inflation.
1921 if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
1922 InflateRegs.push_back(Reg);
1923 }
19091924 if (MO.isDead())
19101925 continue;
19111926 if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
19531968 }
19541969 }
19551970
1971 // After deleting a lot of copies, register classes may be less constrained.
1972 // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 ->
1973 // DPR inflation.
1974 array_pod_sort(InflateRegs.begin(), InflateRegs.end());
1975 InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
1976 InflateRegs.end());
1977 DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
1978 for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
1979 unsigned Reg = InflateRegs[i];
1980 if (MRI->reg_nodbg_empty(Reg))
1981 continue;
1982 if (MRI->recomputeRegClass(Reg, *TM)) {
1983 DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
1984 << MRI->getRegClass(Reg)->getName() << '\n');
1985 ++NumInflated;
1986 }
1987 }
1988
19561989 DEBUG(dump());
19571990 DEBUG(LDV->dump());
19581991 if (VerifyCoalescing)
2121 ; NFP0: vabs.f32 s1, s1
2222
2323 ; CORTEXA8: test:
24 ; CORTEXA8: vabs.f32 d1, d1
24 ; CORTEXA8: vadd.f32 [[D1:d[0-9]+]]
25 ; CORTEXA8: vabs.f32 {{d[0-9]+}}, [[D1]]
26
2527 ; CORTEXA9: test:
2628 ; CORTEXA9: vabs.f32 s{{.}}, s{{.}}
66 ; VFP2: test1:
77 ; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
88 ; NEON: test1:
9 ; NEON: vcvt.s32.f32 d0, d0
9 ; NEON: vadd.f32 [[D0:d[0-9]+]]
10 ; NEON: vcvt.s32.f32 d0, [[D0]]
1011 entry:
1112 %0 = fadd float %a, %b
1213 %1 = fptosi float %0 to i32
1718 ; VFP2: test2:
1819 ; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
1920 ; NEON: test2:
20 ; NEON: vcvt.u32.f32 d0, d0
21 ; NEON: vadd.f32 [[D0:d[0-9]+]]
22 ; NEON: vcvt.u32.f32 d0, [[D0]]
2123 entry:
2224 %0 = fadd float %a, %b
2325 %1 = fptoui float %0 to i32