llvm.org GIT mirror llvm / 83d3482
Merge from mainline. Fix a nasty miscompilation of 176.gcc on linux/x86 where we synthesized a memset using 16-byte XMM stores, but where the stack realignment code didn't work. Until it does (PR2962) disable use of xmm regs in memcpy and memset formation for linux and other targets with insufficiently aligned stacks. This is part of PR2888 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_24@58423 91177308-0d34-0410-b5e6-96231b3b80d8 Tanya Lattner 10 years ago
2 changed file(s) with 31 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
836836 MVT
837837 X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
838838 bool isSrcConst, bool isSrcStr) const {
839 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
840 return MVT::v4i32;
841 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
842 return MVT::v4f32;
839 // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
840 // linux. This is because the stack realignment code can't handle certain
841 // cases like PR2962. This should be removed when PR2962 is fixed.
842 if (Subtarget->getStackAlignment() >= 16) {
843 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
844 return MVT::v4i32;
845 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
846 return MVT::v4f32;
847 }
843848 if (Subtarget->is64Bit() && Size >= 8)
844849 return MVT::i64;
845850 return MVT::i32;
0 ; Linux doesn't support stack realignment for functions with allocas (PR2888).
1 ; Until it does, we shouldn't use movaps to access the stack. On targets with
2 ; sufficiently aligned stack (e.g. darwin) we should.
3
4 ; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
5 ; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
6
7
8 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
9 target triple = "i386-pc-linux-gnu"
10
11 define void @foo(i32 %t) nounwind {
12 %tmp1210 = alloca i8, i32 32, align 4
13 call void @llvm.memset.i64(i8* %tmp1210, i8 0, i64 32, i32 4)
14
15 %x = alloca i8, i32 %t
16 call void @dummy(i8* %x)
17 ret void
18 }
19
20 declare void @dummy(i8* %x)
21 declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind