llvm.org GIT mirror llvm / 554daa6
Be careful about scheduling nodes above previous calls. It increase usages of more callee-saved registers and introduce copies. Only allows it if scheduling a node above calls would end up lessen register pressure. Call operands also has added ABI restrictions for register allocation, so be extra careful with hoisting them above calls. rdar://9329627 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130245 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 8 years ago
8 changed file(s) with 138 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
251251 unsigned short Latency; // Node latency.
252252 bool isVRegCycle : 1; // May use and def the same vreg.
253253 bool isCall : 1; // Is a function call.
254 bool isCallOp : 1; // Is a function call operand.
254255 bool isTwoAddress : 1; // Is a two-address instruction.
255256 bool isCommutable : 1; // Is a commutable instruction.
256257 bool hasPhysRegDefs : 1; // Has physreg defs that are being used.
279280 : Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
280281 NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
281282 NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
282 isVRegCycle(false), isCall(false), isTwoAddress(false),
283 isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
283284 isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
284285 isPending(false), isAvailable(false), isScheduled(false),
285286 isScheduleHigh(false), isScheduleLow(false), isCloned(false),
293294 : Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
294295 NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
295296 NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
296 isVRegCycle(false), isCall(false), isTwoAddress(false),
297 isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
297298 isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
298299 isPending(false), isAvailable(false), isScheduled(false),
299300 isScheduleHigh(false), isScheduleLow(false), isCloned(false),
306307 : Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
307308 NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
308309 NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
309 isVRegCycle(false), isCall(false), isTwoAddress(false),
310 isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
310311 isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
311312 isPending(false), isAvailable(false), isScheduled(false),
312313 isScheduleHigh(false), isScheduleLow(false), isCloned(false),
17311731 // If SU does not have a register def, schedule it close to its uses
17321732 // because it does not lengthen any live ranges.
17331733 return 0;
1734 #if 1
17341735 return SethiUllmanNumbers[SU->NodeNum];
1736 #else
1737 unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1738 if (SU->isCallOp) {
1739 // FIXME: This assumes all of the defs are used as call operands.
1740 int NP = (int)Priority - SU->getNode()->getNumValues();
1741 return (NP > 0) ? NP : 0;
1742 }
1743 return Priority;
1744 #endif
17351745 }
17361746
17371747 //===----------------------------------------------------------------------===//
22372247 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
22382248 unsigned LPriority = SPQ->getNodePriority(left);
22392249 unsigned RPriority = SPQ->getNodePriority(right);
2250
2251 // Be really careful about hoisting call operands above previous calls.
2252 // Only allows it if it would reduce register pressure.
2253 if (left->isCall && right->isCallOp) {
2254 unsigned RNumVals = right->getNode()->getNumValues();
2255 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2256 }
2257 if (right->isCall && left->isCallOp) {
2258 unsigned LNumVals = left->getNode()->getNumValues();
2259 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2260 }
2261
22402262 if (LPriority != RPriority) {
22412263 DEBUG(++FactorCount[FactStatic]);
22422264 return LPriority > RPriority;
2265 }
2266
2267 // One or both of the nodes are calls and their sethi-ullman numbers are the
2268 // same, then keep source order.
2269 if (left->isCall || right->isCall) {
2270 unsigned LOrder = SPQ->getNodeOrdering(left);
2271 unsigned ROrder = SPQ->getNodeOrdering(right);
2272
2273 // Prefer an ordering where the lower the non-zero order number, the higher
2274 // the preference.
2275 if ((LOrder || ROrder) && LOrder != ROrder)
2276 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
22432277 }
22442278
22452279 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
22742308 return LScratch > RScratch;
22752309 }
22762310
2277 if (!DisableSchedCycles) {
2311 // Comparing latency against a call makes little sense unless the node
2312 // is register pressure-neutral.
2313 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2314 return (left->NodeQueueId > right->NodeQueueId);
2315
2316 // Do not compare latencies when one or both of the nodes are calls.
2317 if (!DisableSchedCycles &&
2318 !(left->isCall || right->isCall)) {
22782319 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
22792320 if (result != 0)
22802321 return result > 0;
8282 SU->Latency = Old->Latency;
8383 SU->isVRegCycle = Old->isVRegCycle;
8484 SU->isCall = Old->isCall;
85 SU->isCallOp = Old->isCallOp;
8586 SU->isTwoAddress = Old->isTwoAddress;
8687 SU->isCommutable = Old->isCommutable;
8788 SU->hasPhysRegDefs = Old->hasPhysRegDefs;
284285 Worklist.push_back(DAG->getRoot().getNode());
285286 Visited.insert(DAG->getRoot().getNode());
286287
288 SmallVector CallSUnits;
287289 while (!Worklist.empty()) {
288290 SDNode *NI = Worklist.pop_back_val();
289291
336338 if (!HasGlueUse) break;
337339 }
338340
341 if (NodeSUnit->isCall)
342 CallSUnits.push_back(NodeSUnit);
343
339344 // Schedule zero-latency TokenFactor below any nodes that may increase the
340345 // schedule height. Otherwise, ancestors of the TokenFactor may appear to
341346 // have false stalls.
354359
355360 // Assign the Latency field of NodeSUnit using target-provided information.
356361 ComputeLatency(NodeSUnit);
362 }
363
364 // Find all call operands.
365 while (!CallSUnits.empty()) {
366 SUnit *SU = CallSUnits.pop_back_val();
367 for (const SDNode *SUNode = SU->getNode(); SUNode;
368 SUNode = SUNode->getGluedNode()) {
369 if (SUNode->getOpcode() != ISD::CopyToReg)
370 continue;
371 SDNode *SrcN = SUNode->getOperand(2).getNode();
372 if (isPassiveNode(SrcN)) continue; // Not scheduled.
373 SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
374 SrcSU->isCallOp = true;
375 }
357376 }
358377 }
359378
0 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s
1
2 ; Do not move the umull above previous call which would require use of
3 ; more callee-saved registers and introduce copies.
4 ; rdar://9329627
5
6 %struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
7 %struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }
8
9 @FuncPtr = external hidden unnamed_addr global %struct.FF*
10 @.str1 = external hidden unnamed_addr constant [6 x i8], align 4
11 @G = external unnamed_addr global i32
12 @.str2 = external hidden unnamed_addr constant [58 x i8], align 4
13 @.str3 = external hidden unnamed_addr constant [58 x i8], align 4
14
15 define i32 @test() nounwind optsize ssp {
16 entry:
17 ; CHECK: test:
18 ; CHECK: push
19 ; CHECK-NOT: push
20 %block_size = alloca i32, align 4
21 %block_count = alloca i32, align 4
22 %index_cache = alloca i32, align 4
23 store i32 0, i32* %index_cache, align 4
24 %tmp = load i32* @G, align 4
25 %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
26 switch i32 %tmp1, label %bb8 [
27 i32 0, label %bb
28 i32 536870913, label %bb4
29 i32 536870914, label %bb6
30 ]
31
32 bb:
33 %tmp2 = load i32* @G, align 4
34 %tmp4 = icmp eq i32 %tmp2, 0
35 br i1 %tmp4, label %bb1, label %bb8
36
37 bb1:
38 ; CHECK: %bb1
39 ; CHECK-NOT: umull
40 ; CHECK: blx _Get
41 ; CHECK: umull
42 ; CHECK: blx _foo
43 %tmp5 = load i32* %block_size, align 4
44 %tmp6 = load i32* %block_count, align 4
45 %tmp7 = call %struct.FF* @Get() nounwind
46 store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
47 %tmp10 = zext i32 %tmp6 to i64
48 %tmp11 = zext i32 %tmp5 to i64
49 %tmp12 = mul nsw i64 %tmp10, %tmp11
50 %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
51 br label %bb8
52
53 bb4:
54 ret i32 0
55
56 bb6:
57 ret i32 1
58
59 bb8:
60 ret i32 -1
61 }
62
63 declare i32 @printf(i8*, ...)
64
65 declare %struct.FF* @Get()
66
67 declare i32 @foo(i8*, i64, i32)
68
69 declare i32 @bar(i32, i32, i32)
5757 ; A8: str r2, [r0, r1, lsl #2]
5858
5959 ; A9: test4:
60 ; A9: add r0, r0, r4, lsl #2
60 ; A9: add r0, r0, r{{[0-9]+}}, lsl #2
6161 ; A9: ldr r1, [r0]
6262 ; A9: str r1, [r0]
6363 %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
+0
-20
test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll less more
None ; RUN: llc < %s -regalloc=fast -relocation-model=pic | FileCheck %s
1
2 target triple = "thumbv6-apple-darwin10"
3
4 @fred = internal global i32 0 ; [#uses=1]
5
6 define void @foo() nounwind {
7 entry:
8 ; CHECK: str r0, [sp
9 %0 = call i32 (...)* @bar() nounwind ; [#uses=1]
10 ; CHECK: blx _bar
11 ; CHECK: ldr r1, [sp
12 store i32 %0, i32* @fred, align 4
13 br label %return
14
15 return: ; preds = %entry
16 ret void
17 }
18
19 declare i32 @bar(...)
1212 ; CHECK: _ZNKSs7compareERKSs:
1313 ; CHECK: it eq
1414 ; CHECK-NEXT: subeq r0, r{{[0-9]+}}, r{{[0-9]+}}
15 ; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
15 ; CHECK-NEXT: ldmia.w sp!,
1616 entry:
1717 %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string,std::allocator >"* %this) ; [#uses=3]
1818 %1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string,std::allocator >"* %__str) ; [#uses=3]
7878 ; LINUX-NEXT: .L3$pb:
7979 ; LINUX: popl
8080 ; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]
81 ; LINUX: calll afoo@PLT
8182 ; LINUX: movl pfoo@GOT(%[[REG3]]),
82 ; LINUX: calll afoo@PLT
8383 ; LINUX: calll *
8484 }
8585