llvm.org GIT mirror llvm / 1469e29
X86: Fix conflict over ESI between base register and rep;movsl The new solution is to not use this lowering if there are any dynamic allocas in the current function. We know up front if there are dynamic allocas, but we don't know if we'll need to create stack temporaries with large alignment during lowering. Conservatively assume that we will need such temporaries. Reviewed By: hans Differential Revision: http://reviews.llvm.org/D5128 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216775 91177308-0d34-0410-b5e6-96231b3b80d8 Reid Kleckner 6 years ago
3 changed file(s) with 136 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
2727 : TargetSelectionDAGInfo(&DL) {}
2828
2929 X86SelectionDAGInfo::~X86SelectionDAGInfo() {}
30
31 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
32 SelectionDAG &DAG, ArrayRef ClobberSet) const {
33 // We cannot use TRI->hasBasePointer() until *after* we select all basic
34 // blocks. Legalization may introduce new stack temporaries with large
35 // alignment requirements. Fall back to generic code if there are any
36 // dynamic stack adjustments (hopefully rare) and the base pointer would
37 // conflict if we had to use it.
38 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
39 if (!MFI->hasVarSizedObjects() && !MFI->hasInlineAsmWithSPAdjust())
40 return false;
41
42 const X86RegisterInfo *TRI = static_cast(
43 DAG.getSubtarget().getRegisterInfo());
44 unsigned BaseReg = TRI->getBaseRegister();
45 for (unsigned R : ClobberSet)
46 if (BaseReg == R)
47 return true;
48 return false;
49 }
3050
3151 SDValue
3252 X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
3757 MachinePointerInfo DstPtrInfo) const {
3858 ConstantSDNode *ConstantSize = dyn_cast(Size);
3959 const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget();
60
61 #ifndef NDEBUG
62 // If the base register might conflict with our physical registers, bail out.
63 unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
64 X86::ECX, X86::EAX, X86::EDI};
65 assert(!isBaseRegConflictPossible(DAG, ClobberSet));
66 #endif
4067
4168 // If to a segment-relative address space, use the default lowering.
4269 if (DstPtrInfo.getAddrSpace() >= 256)
200227 SrcPtrInfo.getAddrSpace() >= 256)
201228 return SDValue();
202229
203 // ESI might be used as a base pointer, in that case we can't simply overwrite
204 // the register. Fall back to generic code.
205 const X86RegisterInfo *TRI = static_cast(
206 DAG.getSubtarget().getRegisterInfo());
207 if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
208 TRI->getBaseRegister() == X86::ESI)
230 // If the base register might conflict with our physical registers, bail out.
231 unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
232 X86::ECX, X86::ESI, X86::EDI};
233 if (isBaseRegConflictPossible(DAG, ClobberSet))
209234 return SDValue();
210235
211236 MVT AVT;
2222 class X86Subtarget;
2323
2424 class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
25 /// Returns true if it is possible for the base register to conflict with the
26 /// given set of clobbers for a memory intrinsic.
27 bool isBaseRegConflictPossible(SelectionDAG &DAG,
28 ArrayRef ClobberSet) const;
29
2530 public:
2631 explicit X86SelectionDAGInfo(const DataLayout &DL);
2732 ~X86SelectionDAGInfo();
0 ; RUN: llc -mtriple=i686-windows -mattr=+sse2 < %s | FileCheck %s
1
2 target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
3 target triple = "i686-pc-windows-msvc"
4
5 ; There is a conflict between lowering the X86 memory intrinsics and the "base"
6 ; register used to address stack locals. See X86RegisterInfo::hasBaseRegister
7 ; for when this is necessary. Typically, we chose ESI for the base register,
8 ; which all of the X86 string instructions use.
9
10 ; The pattern of vector icmp and extractelement is used in these tests because
11 ; it forces creation of an aligned stack temporary. Perhaps such temporaries
12 ; shouldn't be aligned.
13
14 declare void @escape_vla_and_icmp(i8*, i1 zeroext)
15 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
16 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
17
18 define i32 @memcpy_novla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
19 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 128, i32 4, i1 false)
20 br i1 %cond, label %spill_vectors, label %no_vectors
21
22 no_vectors:
23 ret i32 0
24
25 spill_vectors:
26 %vp1 = getelementptr <4 x i32>* %vp0, i32 1
27 %v0 = load <4 x i32>* %vp0
28 %v1 = load <4 x i32>* %vp1
29 %vicmp = icmp slt <4 x i32> %v0, %v1
30 %icmp = extractelement <4 x i1> %vicmp, i32 0
31 call void @escape_vla_and_icmp(i8* null, i1 zeroext %icmp)
32 %r = extractelement <4 x i32> %v0, i32 0
33 ret i32 %r
34 }
35
36 ; CHECK-LABEL: _memcpy_novla_vector:
37 ; CHECK: andl $-16, %esp
38 ; CHECK-DAG: movl $32, %ecx
39 ; CHECK-DAG: movl {{.*}}, %esi
40 ; CHECK-DAG: movl {{.*}}, %edi
41 ; CHECK: rep;movsl
42
43 define i32 @memcpy_vla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
44 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 128, i32 4, i1 false)
45 br i1 %cond, label %spill_vectors, label %no_vectors
46
47 no_vectors:
48 ret i32 0
49
50 spill_vectors:
51 %vp1 = getelementptr <4 x i32>* %vp0, i32 1
52 %v0 = load <4 x i32>* %vp0
53 %v1 = load <4 x i32>* %vp1
54 %vicmp = icmp slt <4 x i32> %v0, %v1
55 %icmp = extractelement <4 x i1> %vicmp, i32 0
56 %vla = alloca i8, i32 %n
57 call void @escape_vla_and_icmp(i8* %vla, i1 zeroext %icmp)
58 %r = extractelement <4 x i32> %v0, i32 0
59 ret i32 %r
60 }
61
62 ; CHECK-LABEL: _memcpy_vla_vector:
63 ; CHECK: andl $-16, %esp
64 ; CHECK: movl %esp, %esi
65 ; CHECK: movl $128, {{.*}}(%esp)
66 ; CHECK: calll _memcpy
67 ; CHECK: calll __chkstk
68
69 ; stosd doesn't clobber esi, so we can use it.
70
71 define i32 @memset_vla_vector(<4 x i32>* %vp0, i8* %a, i32 %n, i1 zeroext %cond) {
72 call void @llvm.memset.p0i8.i32(i8* %a, i8 42, i32 128, i32 4, i1 false)
73 br i1 %cond, label %spill_vectors, label %no_vectors
74
75 no_vectors:
76 ret i32 0
77
78 spill_vectors:
79 %vp1 = getelementptr <4 x i32>* %vp0, i32 1
80 %v0 = load <4 x i32>* %vp0
81 %v1 = load <4 x i32>* %vp1
82 %vicmp = icmp slt <4 x i32> %v0, %v1
83 %icmp = extractelement <4 x i1> %vicmp, i32 0
84 %vla = alloca i8, i32 %n
85 call void @escape_vla_and_icmp(i8* %vla, i1 zeroext %icmp)
86 %r = extractelement <4 x i32> %v0, i32 0
87 ret i32 %r
88 }
89
90 ; CHECK-LABEL: _memset_vla_vector:
91 ; CHECK: andl $-16, %esp
92 ; CHECK: movl %esp, %esi
93 ; CHECK-DAG: movl $707406378, %eax # imm = 0x2A2A2A2A
94 ; CHECK-DAG: movl $32, %ecx
95 ; CHECK-DAG: movl {{.*}}, %edi
96 ; CHECK-NOT: movl {{.*}}, %esi
97 ; CHECK: rep;stosl
98
99 ; Add a test for memcmp if we ever add a special lowering for it.