llvm.org GIT mirror llvm / 4594a61
Merging r325049: ------------------------------------------------------------------------ r325049 | rnk | 2018-02-13 12:47:49 -0800 (Tue, 13 Feb 2018) | 17 lines [X86] Use EDI for retpoline when no scratch regs are left Summary: Instead of solving the hard problem of how to pass the callee to the indirect jump thunk without a register, just use a CSR. At a call boundary, there's nothing stopping us from using a CSR to hold the callee as long as we save and restore it in the prologue. Also, add tests for this mregparm=3 case. I wrote execution tests for __llvm_retpoline_push, but they never got committed as lit tests, either because I never rewrote them or because they got lost in merge conflicts. Reviewers: chandlerc, dwmw2 Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D43214 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325090 91177308-0d34-0410-b5e6-96231b3b80d8 Reid Kleckner 1 year, 6 months ago
4 changed file(s) with 76 addition(s) and 72 deletion(s). Raw diff Collapse all Expand all
2626426264 // attempt to help out kernels and other systems where duplicating the
2626526265 // thunks is costly.
2626626266 switch (Reg) {
26267 case 0:
26268 assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
26269 return "__x86_indirect_thunk";
2627026267 case X86::EAX:
2627126268 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2627226269 return "__x86_indirect_thunk_eax";
2627626273 case X86::EDX:
2627726274 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2627826275 return "__x86_indirect_thunk_edx";
26276 case X86::EDI:
26277 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26278 return "__x86_indirect_thunk_edi";
2627926279 case X86::R11:
2628026280 assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
2628126281 return "__x86_indirect_thunk_r11";
2628526285
2628626286 // When targeting an internal COMDAT thunk use an LLVM-specific name.
2628726287 switch (Reg) {
26288 case 0:
26289 assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
26290 return "__llvm_retpoline_push";
2629126288 case X86::EAX:
2629226289 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2629326290 return "__llvm_retpoline_eax";
2629726294 case X86::EDX:
2629826295 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2629926296 return "__llvm_retpoline_edx";
26297 case X86::EDI:
26298 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26299 return "__llvm_retpoline_edi";
2630026300 case X86::R11:
2630126301 assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
2630226302 return "__llvm_retpoline_r11";
2631826318 // just use R11, but we scan for uses anyway to ensure we don't generate
2631926319 // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
2632026320 // already a register use operand to the call to hold the callee. If none
26321 // are available, push the callee instead. This is less efficient, but is
26322 // necessary for functions using 3 regparms. Such function calls are
26323 // (currently) not eligible for tail call optimization, because there is no
26324 // scratch register available to hold the address of the callee.
26321 // are available, use EDI instead. EDI is chosen because EBX is the PIC base
26322 // register and ESI is the base pointer to realigned stack frames with VLAs.
2632526323 SmallVector AvailableRegs;
2632626324 if (Subtarget.is64Bit())
2632726325 AvailableRegs.push_back(X86::R11);
2632826326 else
26329 AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
26327 AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
2633026328
2633126329 // Zero out any registers that are already used.
2633226330 for (const auto &MO : MI.operands()) {
2634426342 break;
2634526343 }
2634626344 }
26345 if (!AvailableReg)
26346 report_fatal_error("calling convention incompatible with retpoline, no "
26347 "available registers");
2634726348
2634826349 const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
2634926350
26350 if (AvailableReg == 0) {
26351 // No register available. Use PUSH. This must not be a tailcall, and this
26352 // must not be x64.
26353 if (Subtarget.is64Bit())
26354 report_fatal_error(
26355 "Cannot make an indirect call on x86-64 using both retpoline and a "
26356 "calling convention that preservers r11");
26357 if (Opc != X86::CALLpcrel32)
26358 report_fatal_error("Cannot make an indirect tail call on x86 using "
26359 "retpoline without a preserved register");
26360 BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
26361 MI.getOperand(0).ChangeToES(Symbol);
26362 MI.setDesc(TII->get(Opc));
26363 } else {
26364 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
26365 .addReg(CalleeVReg);
26366 MI.getOperand(0).ChangeToES(Symbol);
26367 MI.setDesc(TII->get(Opc));
26368 MachineInstrBuilder(*BB->getParent(), &MI)
26369 .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
26370 }
26351 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
26352 .addReg(CalleeVReg);
26353 MI.getOperand(0).ChangeToES(Symbol);
26354 MI.setDesc(TII->get(Opc));
26355 MachineInstrBuilder(*BB->getParent(), &MI)
26356 .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
2637126357 return BB;
2637226358 }
2637326359
4242 static const char EAXThunkName[] = "__llvm_retpoline_eax";
4343 static const char ECXThunkName[] = "__llvm_retpoline_ecx";
4444 static const char EDXThunkName[] = "__llvm_retpoline_edx";
45 static const char PushThunkName[] = "__llvm_retpoline_push";
45 static const char EDIThunkName[] = "__llvm_retpoline_edi";
4646
4747 namespace {
4848 class X86RetpolineThunks : public MachineFunctionPass {
126126 createThunkFunction(M, R11ThunkName);
127127 else
128128 for (StringRef Name :
129 {EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
129 {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
130130 createThunkFunction(M, Name);
131131 InsertedThunks = true;
132132 return true;
150150 populateThunk(MF, X86::R11);
151151 } else {
152152 // For 32-bit targets we need to emit a collection of thunks for various
153 // possible scratch registers as well as a fallback that is used when
154 // there are no scratch registers and assumes the retpoline target has
155 // been pushed.
153 // possible scratch registers as well as a fallback that uses EDI, which is
154 // normally callee saved.
156155 // __llvm_retpoline_eax:
157156 // calll .Leax_call_target
158157 // .Leax_capture_spec:
173172 // movl %edx, (%esp)
174173 // retl
175174 //
176 // This last one is a bit more special and so needs a little extra
177 // handling.
178 // __llvm_retpoline_push:
179 // calll .Lpush_call_target
180 // .Lpush_capture_spec:
181 // pause
182 // lfence
183 // jmp .Lpush_capture_spec
184 // .align 16
185 // .Lpush_call_target:
186 // # Clear pause_loop return address.
187 // addl $4, %esp
188 // # Top of stack words are: Callee, RA. Exchange Callee and RA.
189 // pushl 4(%esp) # Push callee
190 // pushl 4(%esp) # Push RA
191 // popl 8(%esp) # Pop RA to final RA
192 // popl (%esp) # Pop callee to next top of stack
193 // retl # Ret to callee
175 // __llvm_retpoline_edi:
176 // ... # Same setup
177 // movl %edi, (%esp)
178 // retl
194179 if (MF.getName() == EAXThunkName)
195180 populateThunk(MF, X86::EAX);
196181 else if (MF.getName() == ECXThunkName)
197182 populateThunk(MF, X86::ECX);
198183 else if (MF.getName() == EDXThunkName)
199184 populateThunk(MF, X86::EDX);
200 else if (MF.getName() == PushThunkName)
201 populateThunk(MF);
185 else if (MF.getName() == EDIThunkName)
186 populateThunk(MF, X86::EDI);
202187 else
203188 llvm_unreachable("Invalid thunk name on x86-32!");
204189 }
300285 CaptureSpec->addSuccessor(CaptureSpec);
301286
302287 CallTarget->setAlignment(4);
303 if (Reg) {
304 insertRegReturnAddrClobber(*CallTarget, *Reg);
305 } else {
306 assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
307 insert32BitPushReturnAddrClobber(*CallTarget);
308 }
288 insertRegReturnAddrClobber(*CallTarget, *Reg);
309289 BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
310290 }
0 ; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
1
2 ; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
3 ; because there are no available scratch registers. The Linux kernel builds
4 ; with -mregparm=3, so we need to support it. TCO should fail because we need
5 ; to restore EDI.
6
7 define void @call_edi(void (i32, i32, i32)* %fp) #0 {
8 entry:
9 tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
10 ret void
11 }
12
13 ; CHECK-LABEL: call_edi:
14 ; EDI is used, so it must be saved.
15 ; CHECK: pushl %edi
16 ; CHECK-DAG: xorl %eax, %eax
17 ; CHECK-DAG: xorl %edx, %edx
18 ; CHECK-DAG: xorl %ecx, %ecx
19 ; CHECK-DAG: movl {{.*}}, %edi
20 ; CHECK: calll __llvm_retpoline_edi
21 ; CHECK: popl %edi
22 ; CHECK: retl
23
24 define void @edi_external(void (i32, i32, i32)* %fp) #1 {
25 entry:
26 tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
27 ret void
28 }
29
30 ; CHECK-LABEL: edi_external:
31 ; CHECK: pushl %edi
32 ; CHECK-DAG: xorl %eax, %eax
33 ; CHECK-DAG: xorl %edx, %edx
34 ; CHECK-DAG: xorl %ecx, %ecx
35 ; CHECK-DAG: movl {{.*}}, %edi
36 ; CHECK: calll __x86_indirect_thunk_edi
37 ; CHECK: popl %edi
38 ; CHECK: retl
39
40 attributes #0 = { "target-features"="+retpoline" }
41 attributes #1 = { "target-features"="+retpoline-external-thunk" }
335335 ; X86-NEXT: movl %edx, (%esp)
336336 ; X86-NEXT: retl
337337 ;
338 ; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
339 ; X86-NEXT: .hidden __llvm_retpoline_push
340 ; X86-NEXT: .weak __llvm_retpoline_push
341 ; X86: __llvm_retpoline_push:
338 ; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
339 ; X86-NEXT: .hidden __llvm_retpoline_edi
340 ; X86-NEXT: .weak __llvm_retpoline_edi
341 ; X86: __llvm_retpoline_edi:
342342 ; X86-NEXT: # {{.*}} # %entry
343343 ; X86-NEXT: calll [[CALL_TARGET:.*]]
344344 ; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
350350 ; X86-NEXT: .p2align 4, 0x90
351351 ; X86-NEXT: [[CALL_TARGET]]: # Block address taken
352352 ; X86-NEXT: # %entry
353 ; X86-NEXT: addl $4, %esp
354 ; X86-NEXT: pushl 4(%esp)
355 ; X86-NEXT: pushl 4(%esp)
356 ; X86-NEXT: popl 8(%esp)
357 ; X86-NEXT: popl (%esp)
353 ; X86-NEXT: movl %edi, (%esp)
358354 ; X86-NEXT: retl
359355
360356