llvm.org GIT mirror llvm / 9209299
[X86] Optimization for replacing LEA with MOV at frame index elimination time Summary: Replace a LEA instruction of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx' MOV is preferable over LEA because usually there are more issue-slots available to execute MOVs than LEAs. Latest processors also support zero-latency MOVs. Fixes pr29022. Reviewers: hfinkel, delena, igorb, myatsina, mkuper Differential Revision: https://reviews.llvm.org/D24705 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282385 91177308-0d34-0410-b5e6-96231b3b80d8 Zvi Rackover 3 years ago
18 changed file(s) with 83 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
594594 llvm_unreachable("Unused function on X86. Otherwise need a test case.");
595595 }
596596
597 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
598 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
599 // TODO: In this case we should be really trying first to entirely eliminate
600 // this instruction which is a plain copy.
601 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
602 MachineInstr &MI = *II;
603 unsigned Opc = II->getOpcode();
604 // Check if this is a LEA of the form 'lea (%esp), %ebx'
605 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
606 MI.getOperand(2).getImm() != 1 ||
607 MI.getOperand(3).getReg() != X86::NoRegister ||
608 MI.getOperand(4).getImm() != 0 ||
609 MI.getOperand(5).getReg() != X86::NoRegister)
610 return false;
611 unsigned BasePtr = MI.getOperand(1).getReg();
612 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
613 // be replaced with a 32-bit operand MOV which will zero extend the upper
614 // 32-bits of the super register.
615 if (Opc == X86::LEA64_32r)
616 BasePtr = getX86SubSuperRegister(BasePtr, 32);
617 unsigned NewDestReg = MI.getOperand(0).getReg();
618 const X86InstrInfo *TII =
619 MI.getParent()->getParent()->getSubtarget().getInstrInfo();
620 TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
621 MI.getOperand(1).isKill());
622 MI.eraseFromParent();
623 return true;
624 }
625
597626 void
598627 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
599628 int SPAdj, unsigned FIOperandNum,
668697 int Offset = FIOffset + Imm;
669698 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
670699 "Requesting 64-bit offset in 32-bit immediate!");
671 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
700 if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
701 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
672702 } else {
673703 // Offset is symbolic. This is extremely rare.
674704 uint64_t Offset = FIOffset +
2424 ; X64-LABEL: testf16_inp
2525 ; X64: vaddps {{.*}}, {{%ymm[0-1]}}
2626 ; X64: vaddps {{.*}}, {{%ymm[0-1]}}
27 ; X64: leaq {{.*}}(%rsp), %rdi
27 ; X64: movq %rsp, %rdi
2828 ; X64: call
2929 ; X64: ret
3030
2121
2222 ; X64-LABEL: testf16_inp
2323 ; X64: vaddps {{.*}}, {{%zmm[0-1]}}
24 ; X64: leaq {{.*}}(%rsp), %rdi
24 ; X64: movq %rsp, %rdi
2525 ; X64: call
2626 ; X64: ret
2727
55 ;
66 ; CHECK: callq _Z3fooPcjPKc
77 ; CHECK: callq _Z3fooPcjPKc
8 ; CHECK: leaq (%rsp), %rdi
8 ; CHECK: movq %rsp, %rdi
99 ; CHECK: movl $4, %esi
1010 ; CHECK: testl {{%[a-z]+}}, {{%[a-z]+}}
1111 ; CHECK: je .LBB0_4
3737 ; CHECK: subq ${{[0-9]+}}, %rsp
3838 ;
3939 ; CHECK: leaq {{[0-9]*}}(%rsp), %rdi
40 ; CHECK: leaq {{[0-9]*}}(%rsp), %rsi
40 ; CHECK: movq %rsp, %rsi
4141 ; CHECK: callq _t2_helper
4242 ;
4343 ; CHECK: movq %rbp, %rsp
8888 ; CHECK: movq %rsp, %rbx
8989 ;
9090 ; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
91 ; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
91 ; CHECK: movq %rbx, %rdx
9292 ; CHECK: callq _t4_helper
9393 ;
9494 ; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp
413413 ; SSE-NEXT: subq $64, %rsp
414414 ; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
415415 ; SSE-NEXT: movaps %xmm0, (%rsp)
416 ; SSE-NEXT: leaq (%rsp), %rax
416 ; SSE-NEXT: movq %rsp, %rax
417417 ; SSE-NEXT: movb (%rdi,%rax), %al
418418 ; SSE-NEXT: movq %rbp, %rsp
419419 ; SSE-NEXT: popq %rbp
426426 ; AVX-NEXT: andq $-32, %rsp
427427 ; AVX-NEXT: subq $64, %rsp
428428 ; AVX-NEXT: vmovaps %ymm0, (%rsp)
429 ; AVX-NEXT: leaq (%rsp), %rax
429 ; AVX-NEXT: movq %rsp, %rax
430430 ; AVX-NEXT: movb (%rdi,%rax), %al
431431 ; AVX-NEXT: movq %rbp, %rsp
432432 ; AVX-NEXT: popq %rbp
253253 call void @test20sret(%struct.a* sret %tmp)
254254 ret void
255255 ; CHECK-LABEL: test20:
256 ; CHECK: leaq (%rsp), %rdi
256 ; CHECK: movq %rsp, %rdi
257257 ; CHECK: callq _test20sret
258258 }
259259 declare void @test20sret(%struct.a* sret)
8282 ret void
8383 ; CHECK-LABEL: test4:
8484 ; CHECK: subl $28
85 ; CHECK: leal (%esp), %ecx
85 ; CHECK: movl %esp, %ecx
8686 ; CHECK: calll _test4fastccsret
8787 ; CHECK: addl $28
8888 }
1818 ; CHECK-W64-LABEL: test1
1919 ; CHECK-W64: push
2020 ; CHECK-W64-NEXT: movq %rsp, %rbp
21 ; CHECK-W64-NEXT: leaq (%rbp), %rax
21 ; CHECK-W64-NEXT: movq %rbp, %rax
2222 ; CHECK-W64-NEXT: pop
2323 ; CHECK-W64-NEXT: ret
2424 ; CHECK-64-LABEL: test1
5353 ; CHECK-W64-LABEL: test2
5454 ; CHECK-W64: push
5555 ; CHECK-W64-NEXT: movq %rsp, %rbp
56 ; CHECK-W64-NEXT: leaq (%rbp), %rax
56 ; CHECK-W64-NEXT: movq %rbp, %rax
5757 ; CHECK-W64-NEXT: pop
5858 ; CHECK-W64-NEXT: ret
5959 ; CHECK-64-LABEL: test2
2020 %call1 = tail call <4 x float> @_mm_castsi128_ps(<2 x i64> %tmp0)
2121 ret void
2222 ; CHECK-LABEL: test1:
23 ; CHECK: leal{{.*}}
23 ; CHECK: movl %esp,
2424 ; CHECK: calll _memcpy
2525 ; CHECK: movaps __xmm@{{[0-9a-f]+}}, %xmm1
2626 ; CHECK: calll __mm_xor_si128
6666 ; X64: callq check_a
6767 ; X64: callq bar1
6868 ; X64: callq bar1
69 ; X64: leaq (%rsp), %rdi
69 ; X64: movq %rsp, %rdi
7070 ; X64: callq check_f
7171 ; X64: callq bar1
7272 ; X64: callq bar3
0 ; RUN: llc < %s -mcpu=skx -mtriple x86_64-unknown-linux-gnu -verify-machineinstrs | FileCheck %s
1 ; RUN: llc < %s -mcpu=skx -mtriple=x86_64-linux-gnux32 -verify-machineinstrs | FileCheck %s --check-prefix=X32
2
3 define i32 @A() {
4 ; CHECK: movq %rsp, %rdi
5 ; CHECK-NEXT: call
6
7 ; X32: movl %esp, %edi
8 ; X32-NEXT: call
9 %alloc = alloca i32, align 8
10 %call = call i32 @foo(i32* %alloc)
11 ret i32 %call
12 }
13
14 declare i32 @foo(i32*)
2626 ; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
2727 ; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
2828 ; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
29 ; NOT_WIN: leaq {{.*}}(%rsp), %rdi
29 ; NOT_WIN: movq %rsp, %rdi
3030 ; NOT_WIN: call
3131 ; NOT_WIN: ret
3232
812812 ; X32-LABEL: test_MM_GET_EXCEPTION_MASK:
813813 ; X32: # BB#0:
814814 ; X32-NEXT: pushl %eax
815 ; X32-NEXT: leal (%esp), %eax
815 ; X32-NEXT: movl %esp, %eax
816816 ; X32-NEXT: stmxcsr (%eax)
817817 ; X32-NEXT: movl (%esp), %eax
818818 ; X32-NEXT: andl $8064, %eax # imm = 0x1F80
839839 ; X32-LABEL: test_MM_GET_EXCEPTION_STATE:
840840 ; X32: # BB#0:
841841 ; X32-NEXT: pushl %eax
842 ; X32-NEXT: leal (%esp), %eax
842 ; X32-NEXT: movl %esp, %eax
843843 ; X32-NEXT: stmxcsr (%eax)
844844 ; X32-NEXT: movl (%esp), %eax
845845 ; X32-NEXT: andl $63, %eax
865865 ; X32-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
866866 ; X32: # BB#0:
867867 ; X32-NEXT: pushl %eax
868 ; X32-NEXT: leal (%esp), %eax
868 ; X32-NEXT: movl %esp, %eax
869869 ; X32-NEXT: stmxcsr (%eax)
870870 ; X32-NEXT: movl (%esp), %eax
871871 ; X32-NEXT: andl $32768, %eax # imm = 0x8000
891891 ; X32-LABEL: test_MM_GET_ROUNDING_MODE:
892892 ; X32: # BB#0:
893893 ; X32-NEXT: pushl %eax
894 ; X32-NEXT: leal (%esp), %eax
894 ; X32-NEXT: movl %esp, %eax
895895 ; X32-NEXT: stmxcsr (%eax)
896896 ; X32-NEXT: movl (%esp), %eax
897897 ; X32-NEXT: andl $24576, %eax # imm = 0x6000
917917 ; X32-LABEL: test_mm_getcsr:
918918 ; X32: # BB#0:
919919 ; X32-NEXT: pushl %eax
920 ; X32-NEXT: leal (%esp), %eax
920 ; X32-NEXT: movl %esp, %eax
921921 ; X32-NEXT: stmxcsr (%eax)
922922 ; X32-NEXT: movl (%esp), %eax
923923 ; X32-NEXT: popl %ecx
14261426 ; X32: # BB#0:
14271427 ; X32-NEXT: pushl %eax
14281428 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1429 ; X32-NEXT: leal (%esp), %ecx
1429 ; X32-NEXT: movl %esp, %ecx
14301430 ; X32-NEXT: stmxcsr (%ecx)
14311431 ; X32-NEXT: movl (%esp), %edx
14321432 ; X32-NEXT: andl $-8065, %edx # imm = 0xE07F
14631463 ; X32: # BB#0:
14641464 ; X32-NEXT: pushl %eax
14651465 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1466 ; X32-NEXT: leal (%esp), %ecx
1466 ; X32-NEXT: movl %esp, %ecx
14671467 ; X32-NEXT: stmxcsr (%ecx)
14681468 ; X32-NEXT: movl (%esp), %edx
14691469 ; X32-NEXT: andl $-64, %edx
14991499 ; X32: # BB#0:
15001500 ; X32-NEXT: pushl %eax
15011501 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1502 ; X32-NEXT: leal (%esp), %ecx
1502 ; X32-NEXT: movl %esp, %ecx
15031503 ; X32-NEXT: stmxcsr (%ecx)
15041504 ; X32-NEXT: movl (%esp), %edx
15051505 ; X32-NEXT: andl $-32769, %edx # imm = 0xFFFF7FFF
15791579 ; X32: # BB#0:
15801580 ; X32-NEXT: pushl %eax
15811581 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1582 ; X32-NEXT: leal (%esp), %ecx
1582 ; X32-NEXT: movl %esp, %ecx
15831583 ; X32-NEXT: stmxcsr (%ecx)
15841584 ; X32-NEXT: movl (%esp), %edx
15851585 ; X32-NEXT: andl $-24577, %edx # imm = 0x9FFF
16541654 ; X32: # BB#0:
16551655 ; X32-NEXT: pushl %eax
16561656 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1657 ; X32-NEXT: leal (%esp), %ecx
1657 ; X32-NEXT: movl %esp, %ecx
16581658 ; X32-NEXT: movl %eax, (%esp)
16591659 ; X32-NEXT: ldmxcsr (%ecx)
16601660 ; X32-NEXT: popl %eax
3737 ; in memroy. The caller provides space for the return value and passes
3838 ; the address in %rax. The first input argument will be in %rdi.
3939 ; CHECK-LABEL: test2:
40 ; CHECK: leaq (%rsp), %rax
40 ; CHECK: movq %rsp, %rax
4141 ; CHECK: callq gen2
4242 ; CHECK: movl (%rsp)
4343 ; CHECK-DAG: addl 4(%rsp)
4545 ; CHECK-DAG: addl 12(%rsp)
4646 ; CHECK-DAG: addl 16(%rsp)
4747 ; CHECK-O0-LABEL: test2:
48 ; CHECK-O0-DAG: leaq (%rsp), %rax
48 ; CHECK-O0-DAG: movq %rsp, %rax
4949 ; CHECK-O0: callq gen2
5050 ; CHECK-O0-DAG: movl (%rsp)
5151 ; CHECK-O0-DAG: movl 4(%rsp)
137137 ; The this pointer goes to ECX.
138138 ; (through %ecx in the -O0 build).
139139 ; WIN32: leal {{[0-9]*}}(%esp), %e{{[a-d]}}x
140 ; WIN32: leal {{[0-9]*}}(%esp), %ecx
140 ; WIN32: {{leal [1-9]+\(%esp\)|movl %esp}}, %ecx
141141 ; WIN32: {{pushl %e[a-d]x|movl %e[a-d]x, \(%esp\)}}
142142 ; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
143143 ; WIN32: retl
157157
158158
159159 ; The sret pointer is (%esp)
160 ; WIN32: leal {{4?}}(%esp), %eax
160 ; WIN32: {{leal 4\(%esp\)|movl %esp}}, %eax
161161 ; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
162162
163163 ; The sret pointer is %ecx
164164 ; The %x argument is moved to (%esp). It will be the this pointer.
165 ; MINGW_X86: leal {{4?}}(%esp), %ecx
165 ; MINGW_X86: {{leal 4\(%esp\)|movl %esp}}, %ecx
166166 ; MINGW_X86-NEXT: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}}
167167 ; MINGW_X86-NEXT: calll _test6_g
168168
169 ; CYGWIN: leal {{4?}}(%esp), %ecx
169 ; CYGWIN: {{leal 4\(%esp\)|movl %esp}}, %ecx
170170 ; CYGWIN-NEXT: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}}
171171 ; CYGWIN-NEXT: calll _test6_g
172172
190190 ; CYGWIN: movl {{16|20}}(%esp), %ecx
191191
192192 ; The sret pointer is (%esp)
193 ; WIN32: leal {{4?}}(%esp), %eax
193 ; WIN32: {{leal 4\(%esp\)|movl %esp}}, %eax
194194 ; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
195 ; MINGW_X86: leal {{4?}}(%esp), %eax
195 ; MINGW_X86: {{leal 4\(%esp\)|movl %esp}}, %eax
196196 ; MINGW_X86-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
197 ; CYGWIN: leal {{4?}}(%esp), %eax
197 ; CYGWIN: {{leal 4\(%esp\)|movl %esp}}, %eax
198198 ; CYGWIN-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
199199
200200 %tmp = alloca %struct.test7, align 4
109109 %gep = getelementptr [300 x i8], [300 x i8]* %alloca, i32 0, i32 0
110110 call void @external(i8* %gep)
111111 ; CHECK: subq $32, %rsp
112 ; CHECK: leaq (%rbx), %rcx
112 ; CHECK: movq %rbx, %rcx
113113 ; CHECK: callq external
114114 ; CHECK: addq $32, %rsp
115115
169169 ; OBJ: PtrParent: 0x0
170170 ; OBJ: PtrEnd: 0x0
171171 ; OBJ: PtrNext: 0x0
172 ; OBJ: CodeSize: 0x3D
172 ; OBJ: CodeSize: 0x3C
173173 ; OBJ: DbgStart: 0x0
174174 ; OBJ: DbgEnd: 0x0
175175 ; OBJ: FunctionType: baz (0x1004)
188188 ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x8, LineOffset: 1}
189189 ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x7, LineOffset: 1}
190190 ; OBJ-NEXT: ChangeLineOffset: 1
191 ; OBJ-NEXT: ChangeCodeOffset: 0x1E
191 ; OBJ-NEXT: ChangeCodeOffset: 0x1D
192192 ; OBJ-NEXT: ChangeCodeLength: 0x7
193193 ; OBJ: ]
194194 ; OBJ: }
198198 ; OBJ: Inlinee: foo (0x1003)
199199 ; OBJ: BinaryAnnotations [
200200 ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0xF, LineOffset: 1}
201 ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0xA, LineOffset: 1}
201 ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x9, LineOffset: 1}
202202 ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x6, LineOffset: 1}
203203 ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x7, LineOffset: 1}
204204 ; OBJ-NEXT: ChangeCodeLength: 0x7