llvm.org GIT mirror llvm / aa0cfea
Don't fold indexed loads into TCRETURNmi64. We don't have enough GR64_TC registers when calling a varargs function with 6 arguments. Since %al holds the number of vector registers used, only %r11 is available as a scratch register. This means that addressing modes using both base and index registers can't be folded into TCRETURNmi64. <rdar://problem/12282281> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163761 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 7 years ago
4 changed file(s) with 74 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
203203 bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
204204 SDValue &Scale, SDValue &Index, SDValue &Disp,
205205 SDValue &Segment);
206 bool SelectSingleRegAddr(SDNode *Parent, SDValue N, SDValue &Base,
207 SDValue &Scale, SDValue &Index, SDValue &Disp,
208 SDValue &Segment);
206209 bool SelectLEAAddr(SDValue N, SDValue &Base,
207210 SDValue &Scale, SDValue &Index, SDValue &Disp,
208211 SDValue &Segment);
13161319
13171320 getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
13181321 return true;
1322 }
1323
1324 /// SelectSingleRegAddr - Like SelectAddr, but reject any address that would
1325 /// require more than one allocatable register.
1326 ///
1327 /// This is used for a TCRETURNmi64 instruction when used to tail call a
1328 /// variadic function with 6 arguments: Only %r11 is available from GR64_TC.
1329 /// The other scratch register, %rax, is needed to pass in the number of vector
1330 /// registers used in the variadic arguments.
1331 ///
1332 bool X86DAGToDAGISel::SelectSingleRegAddr(SDNode *Parent, SDValue N,
1333 SDValue &Base,
1334 SDValue &Scale, SDValue &Index,
1335 SDValue &Disp, SDValue &Segment) {
1336 if (!SelectAddr(Parent, N, Base, Scale, Index, Disp, Segment))
1337 return false;
1338 // Anything %RIP relative is fine.
1339 if (RegisterSDNode *Reg = dyn_cast(Base))
1340 if (Reg->getReg() == X86::RIP)
1341 return true;
1342 // Check that the index register is 0.
1343 if (RegisterSDNode *Reg = dyn_cast(Index))
1344 if (Reg->getReg() == 0)
1345 return true;
1346 return false;
13191347 }
13201348
13211349 /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
10401040 (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
10411041 Requires<[In64BitMode]>;
10421042
1043 def : Pat<(X86tcret (load addr:$dst), imm:$off),
1043 // When calling a variadic function with 6 arguments, 7 scratch registers are
1044 // needed since %al holds the number of vector registers used. That leaves %r11
1045 // as the only remaining GR64_TC register for the addressing mode.
1046 //
1047 // The single_reg_addr pattern rejects any addressing modes that would need
1048 // more than one register.
1049 def : Pat<(X86tcret (load single_reg_addr:$dst), imm:$off),
10441050 (TCRETURNmi64 addr:$dst, imm:$off)>,
10451051 Requires<[In64BitMode]>;
10461052
541541
542542 def tls64baseaddr : ComplexPattern
543543 [tglobaltlsaddr], []>;
544
545 // Same as addr, but reject addressing modes requiring more than one register.
546 def single_reg_addr : ComplexPattern
547 [SDNPWantParent]>;
544548
545549 //===----------------------------------------------------------------------===//
546550 // X86 Instruction Predicate Definitions.
None ; RUN: llc < %s | FileCheck %s
0 ; RUN: llc < %s -verify-machineinstrs | FileCheck %s
11 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
22 target triple = "x86_64-apple-darwin11.4.0"
33
9292 ret { i64, i64 } %mrv7
9393 }
9494
95 ; Fold an indexed load into the tail call instruction.
96 ; Calling a varargs function with 6 arguments requires 7 registers (%al is the
97 ; vector count for varargs functions). This leaves %r11 as the only available
98 ; scratch register.
99 ;
100 ; It is not possible to fold an indexed load into TCRETURNmi64 in that case.
101 ;
102 ; typedef int (*funcptr)(void*, ...);
103 ; extern const funcptr funcs[];
104 ; int f(int n) {
105 ; return funcs[n](0, 0, 0, 0, 0, 0);
106 ; }
107 ;
108 ; CHECK: rdar12282281
109 ; CHECK: jmpq *%r11 # TAILCALL
110 @funcs = external constant [0 x i32 (i8*, ...)*]
95111
112 define i32 @rdar12282281(i32 %n) nounwind uwtable ssp {
113 entry:
114 %idxprom = sext i32 %n to i64
115 %arrayidx = getelementptr inbounds [0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 %idxprom
116 %0 = load i32 (i8*, ...)** %arrayidx, align 8
117 %call = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind
118 ret i32 %call
119 }
120
121 ; Same thing, using a fixed offset. The load should foid.
122 ; CHECK: rdar12282281fixed
123 ; CHECK: jmpq *8(%r11) # TAILCALL
124 define i32 @rdar12282281fixed() nounwind uwtable ssp {
125 entry:
126 %0 = load i32 (i8*, ...)** getelementptr inbounds ([0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 1), align 8
127 %call.i = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind
128 ret i32 %call.i
129 }