llvm.org GIT mirror llvm / 5248468
Enable cross register class coalescing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@76281 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 11 years ago
10 changed file(s) with 198 addition(s) and 19 deletion(s). Raw diff Collapse all Expand all
483483 return 0;
484484 }
485485
486 /// getMatchingSuperRegClass - Return a subclass of the specified register
487 /// class A so that each register in it has a sub-register of the
488 /// specified sub-register index which is in the specified register class B.
489 virtual const TargetRegisterClass *
490 getMatchingSuperRegClass(const TargetRegisterClass *A,
491 const TargetRegisterClass *B, unsigned Idx) const {
492 return 0;
493 }
494
486495 //===--------------------------------------------------------------------===//
487496 // Register Class Information
488497 //
5858 static cl::opt
5959 CrossClassJoin("join-cross-class-copies",
6060 cl::desc("Coalesce cross register class copies"),
61 cl::init(false), cl::Hidden);
61 cl::init(true), cl::Hidden);
6262
6363 static cl::opt
6464 PhysJoinTweak("tweak-phys-join-heuristics",
13071307
13081308 // Should be non-null only when coalescing to a sub-register class.
13091309 bool CrossRC = false;
1310 const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
1311 const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
13101312 const TargetRegisterClass *NewRC = NULL;
13111313 MachineBasicBlock *CopyMBB = CopyMI->getParent();
13121314 unsigned RealDstReg = 0;
13721374 }
13731375 }
13741376 if (SubIdx) {
1377 if (isInsSubReg || isSubRegToReg) {
1378 if (!DstIsPhys && !SrcIsPhys) {
1379 NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx);
1380 if (!NewRC)
1381 return false;
1382 }
1383 }
13751384 unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
13761385 unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
13771386 unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
14231432 }
14241433 }
14251434
1426 const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
1427 const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
14281435 unsigned LargeReg = SrcReg;
14291436 unsigned SmallReg = DstReg;
1430 unsigned Limit = 0;
14311437
14321438 // Now determine the register class of the joined register.
14331439 if (isExtSubReg) {
14381444 Again = true;
14391445 return false;
14401446 }
1441 Limit = allocatableRCRegs_[DstRC].count();
1447 if (!DstIsPhys && !SrcIsPhys)
1448 NewRC = SrcRC;
14421449 } else if (!SrcIsPhys && !DstIsPhys) {
14431450 NewRC = getCommonSubClass(SrcRC, DstRC);
14441451 if (!NewRC) {
16421649
16431650 // Coalescing to a virtual register that is of a sub-register class of the
16441651 // other. Make sure the resulting register is set to the right register class.
1645 if (CrossRC) {
1646 ++numCrossRCs;
1647 if (NewRC)
1648 mri_->setRegClass(DstReg, NewRC);
1649 }
1652 if (CrossRC)
1653 ++numCrossRCs;
1654
1655 // This may happen even if it's cross-rc coalescing. e.g.
1656 // %reg1026 = SUBREG_TO_REG 0, %reg1037, 4
1657 // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to
1658 // be allocate a register from GR64_ABCD.
1659 if (NewRC)
1660 mri_->setRegClass(DstReg, NewRC);
16501661
16511662 if (NewHeuristic) {
16521663 // Add all copies that define val# in the source interval into the queue.
13431343 ++NumStores;
13441344 }
13451345
1346 /// isSafeToDelete - Return true if this instruction doesn't produce any side
1347 /// effect and all of its defs are dead.
1348 static bool isSafeToDelete(MachineInstr &MI) {
1349 const TargetInstrDesc &TID = MI.getDesc();
1350 if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
1351 TID.isCall() || TID.isBarrier() || TID.isReturn() ||
1352 TID.hasUnmodeledSideEffects())
1353 return false;
1354 if (TID.getImplicitDefs())
1355 return false;
1356 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1357 MachineOperand &MO = MI.getOperand(i);
1358 if (!MO.isReg() || !MO.getReg())
1359 continue;
1360 if (MO.isDef() && !MO.isDead())
1361 return false;
1362 if (MO.isUse() && MO.isKill())
1363 // FIXME: We can't remove kill markers or else the scavenger will assert.
1364 // An alternative is to add a ADD pseudo instruction to replace kill
1365 // markers.
1366 return false;
1367 }
1368 return true;
1369 }
1370
13461371 /// TransferDeadness - A identity copy definition is dead and it's being
13471372 /// removed. Find the last def or use and mark it as dead / kill.
13481373 void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
13841409 if (LastUD->isDef()) {
13851410 // If the instruction has no side effect, delete it and propagate
13861411 // backward further. Otherwise, mark is dead and we are done.
1387 const TargetInstrDesc &TID = LastUDMI->getDesc();
1388 if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
1389 TID.hasUnmodeledSideEffects()) {
1412 if (!isSafeToDelete(*LastUDMI)) {
13901413 LastUD->setIsDead();
13911414 break;
13921415 }
21692192 }
21702193 }
21712194 ProcessNextInst:
2172 DistanceMap.insert(std::make_pair(&MI, Dist++));
2195 // Delete dead instructions without side effects.
2196 if (!Erased && !BackTracked && isSafeToDelete(MI)) {
2197 InvalidateKills(MI, TRI, RegKills, KillOps);
2198 VRM.RemoveMachineInstrFromMaps(&MI);
2199 MBB.erase(&MI);
2200 Erased = true;
2201 }
2202 if (!Erased)
2203 DistanceMap.insert(std::make_pair(&MI, Dist++));
21732204 if (!Erased && !BackTracked) {
21742205 for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
21752206 UpdateKills(*II, TRI, RegKills, KillOps);
149149 llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
150150 return 0;
151151 }
152 }
153
154 const TargetRegisterClass *
155 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
156 const TargetRegisterClass *B,
157 unsigned SubIdx) const {
158 switch (SubIdx) {
159 default: return 0;
160 case 1:
161 // 8-bit
162 if (B == &X86::GR8RegClass) {
163 if (A == &X86::GR64RegClass)
164 return &X86::GR64RegClass;
165 else if (A == &X86::GR32RegClass)
166 return &X86::GR32RegClass;
167 else if (A == &X86::GR16RegClass)
168 return &X86::GR16RegClass;
169 } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
170 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass)
171 return &X86::GR64_ABCDRegClass;
172 else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass)
173 return &X86::GR32_ABCDRegClass;
174 else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass)
175 return &X86::GR16_ABCDRegClass;
176 } else if (B == &X86::GR8_NOREXRegClass) {
177 if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
178 return &X86::GR64_NOREXRegClass;
179 else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass)
180 return &X86::GR32_NOREXRegClass;
181 else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
182 return &X86::GR16_NOREXRegClass;
183 }
184 break;
185 case 2:
186 // 8-bit hi
187 if (B == &X86::GR8_ABCD_HRegClass) {
188 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass)
189 return &X86::GR64_ABCDRegClass;
190 else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass)
191 return &X86::GR32_ABCDRegClass;
192 else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass)
193 return &X86::GR16_ABCDRegClass;
194 }
195 break;
196 case 3:
197 // 16-bit
198 if (B == &X86::GR16RegClass) {
199 if (A == &X86::GR64RegClass)
200 return &X86::GR64RegClass;
201 else if (A == &X86::GR32RegClass)
202 return &X86::GR32RegClass;
203 } else if (B == &X86::GR16_ABCDRegClass) {
204 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass)
205 return &X86::GR64_ABCDRegClass;
206 else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass)
207 return &X86::GR32_ABCDRegClass;
208 } else if (B == &X86::GR16_NOREXRegClass) {
209 if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
210 return &X86::GR64_NOREXRegClass;
211 else if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass)
212 return &X86::GR64_ABCDRegClass;
213 }
214 break;
215 case 4:
216 // 32-bit
217 if (B == &X86::GR32RegClass) {
218 if (A == &X86::GR64RegClass)
219 return &X86::GR64RegClass;
220 } else if (B == &X86::GR32_ABCDRegClass) {
221 if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass)
222 return &X86::GR64_ABCDRegClass;
223 } else if (B == &X86::GR32_NOREXRegClass) {
224 if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
225 return &X86::GR64_NOREXRegClass;
226 }
227 break;
228 }
229 return 0;
152230 }
153231
154232 const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const {
9292 /// Code Generation virtual methods...
9393 ///
9494
95 /// getMatchingSuperRegClass - Return a subclass of the specified register
96 /// class A so that each register in it has a sub-register of the
97 /// specified sub-register index which is in the specified register class B.
98 virtual const TargetRegisterClass *
99 getMatchingSuperRegClass(const TargetRegisterClass *A,
100 const TargetRegisterClass *B, unsigned Idx) const;
101
95102 /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
96103 /// values.
97104 const TargetRegisterClass *getPointerRegClass() const;
None ; RUN: llvm-as < %s | llc | grep 328
0 ; RUN: llvm-as < %s | llc | grep 168
11
22 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
33 target triple = "s390x-linux"
None ; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 1
0 ; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 2
1 ; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | not grep movb
12
23 %struct.double_int = type { i64, i64 }
34 %struct.tree_common = type <{ i8, [3 x i8] }>
56 %struct.tree_node = type { %struct.tree_int_cst }
67 @tree_code_type = external constant [0 x i32] ; <[0 x i32]*> [#uses=1]
78
8 define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) {
9 define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind {
910 entry:
1011 %tmp2526 = bitcast %struct.tree_node* %t1 to i32* ; [#uses=1]
1112 br i1 false, label %UnifiedReturnBlock, label %bb21
0 ; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | not grep movaps
1 ; rdar://6509240
2
3 type { %struct.TValue } ; type %0
4 type { %struct.L_Umaxalign, i32, %struct.Node* } ; type %1
5 %struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
6 %struct.GCObject = type { %struct.lua_State }
7 %struct.L_Umaxalign = type { double }
8 %struct.Mbuffer = type { i8*, i32, i32 }
9 %struct.Node = type { %struct.TValue, %struct.TKey }
10 %struct.TKey = type { %1 }
11 %struct.TString = type { %struct.anon }
12 %struct.TValue = type { %struct.L_Umaxalign, i32 }
13 %struct.Table = type { %struct.GCObject*, i8, i8, i8, i8, %struct.Table*, %struct.TValue*, %struct.Node*, %struct.Node*, %struct.GCObject*, i32 }
14 %struct.UpVal = type { %struct.GCObject*, i8, i8, %struct.TValue*, %0 }
15 %struct.anon = type { %struct.GCObject*, i8, i8, i8, i32, i32 }
16 %struct.global_State = type { %struct.stringtable, i8* (i8*, i8*, i32, i32)*, i8*, i8, i8, i32, %struct.GCObject*, %struct.GCObject**, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.Mbuffer, i32, i32, i32, i32, i32, i32, i32 (%struct.lua_State*)*, %struct.TValue, %struct.lua_State*, %struct.UpVal, [9 x %struct.Table*], [17 x %struct.TString*] }
17 %struct.lua_Debug = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
18 %struct.lua_State = type { %struct.GCObject*, i8, i8, i8, %struct.TValue*, %struct.TValue*, %struct.global_State*, %struct.CallInfo*, i32*, %struct.TValue*, %struct.TValue*, %struct.CallInfo*, %struct.CallInfo*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State*, %struct.lua_Debug*)*, %struct.TValue, %struct.TValue, %struct.GCObject*, %struct.GCObject*, %struct.lua_longjmp*, i32 }
19 %struct.lua_longjmp = type { %struct.lua_longjmp*, [18 x i32], i32 }
20 %struct.stringtable = type { %struct.GCObject**, i32, i32 }
21 @llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.lua_State*)* @os_clock to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
22
23 define i32 @os_clock(%struct.lua_State* nocapture %L) nounwind ssp {
24 entry:
25 %0 = tail call i32 @"\01_clock$UNIX2003"() nounwind ; [#uses=1]
26 %1 = uitofp i32 %0 to double ; [#uses=1]
27 %2 = fdiv double %1, 1.000000e+06 ; [#uses=1]
28 %3 = getelementptr %struct.lua_State* %L, i32 0, i32 4 ; <%struct.TValue**> [#uses=3]
29 %4 = load %struct.TValue** %3, align 4 ; <%struct.TValue*> [#uses=2]
30 %5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0 ; [#uses=1]
31 store double %2, double* %5, align 4
32 %6 = getelementptr %struct.TValue* %4, i32 0, i32 1 ; [#uses=1]
33 store i32 3, i32* %6, align 4
34 %7 = load %struct.TValue** %3, align 4 ; <%struct.TValue*> [#uses=1]
35 %8 = getelementptr %struct.TValue* %7, i32 1 ; <%struct.TValue*> [#uses=1]
36 store %struct.TValue* %8, %struct.TValue** %3, align 4
37 ret i32 1
38 }
39
40 declare i32 @"\01_clock$UNIX2003"()
0 ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
11 ; RUN: grep stackcoloring %t | grep "loads eliminated"
2 ; RUN: grep stackcoloring %t | grep "stores eliminated"
2 ; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5
3 ; RUN: grep asm-printer %t | grep 176
34
45 type { [62 x %struct.Bitvec*] } ; type %0
56 type { i8* } ; type %1
None ; RUN: llvm-as < %s | llc | grep {movq %rdi, %rax}
0 ; RUN: llvm-as < %s | llc
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
33 target triple = "x86_64-apple-darwin8"