llvm.org GIT mirror llvm / 1887c1c
Fix a number of byval / memcpy / memset related codegen issues. 1. x86-64 byval alignment should be max of 8 and alignment of type. Previously the code was not doing what the commit message was saying. 2. Do not use byte repeat move and store operations. These are slow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55139 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 11 years ago
7 changed file(s) with 103 addition(s) and 58 deletion(s). Raw diff Collapse all Expand all
795795 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
796796 /// are at 4-byte boundaries.
797797 unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
798 if (Subtarget->is64Bit())
799 return getTargetData()->getABITypeAlignment(Ty);
798 if (Subtarget->is64Bit()) {
799 // Max of 8 and alignment of type.
800 unsigned TyAlign = getTargetData()->getABITypeAlignment(Ty);
801 if (TyAlign > 8)
802 return TyAlign;
803 return 8;
804 }
805
800806 unsigned Align = 4;
801807 if (Subtarget->hasSSE1())
802808 getMaxByValAlign(Ty, Align);
50135019
50145020 SDValue
50155021 X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
5016 SDValue Chain,
5017 SDValue Dst, SDValue Src,
5018 SDValue Size, unsigned Align,
5022 SDValue Chain,
5023 SDValue Dst, SDValue Src,
5024 SDValue Size, unsigned Align,
50195025 const Value *DstSV, uint64_t DstSVOff) {
50205026 ConstantSDNode *ConstantSize = dyn_cast(Size);
50215027
50225028 /// If not DWORD aligned or size is more than the threshold, call the library.
50235029 /// The libc version is likely to be faster for these cases. It can use the
50245030 /// address value and run time information about the CPU.
5025 if ((Align & 3) == 0 ||
5031 if ((Align & 3) != 0 ||
50265032 !ConstantSize ||
50275033 ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) {
50285034 SDValue InFlag(0, 0);
50645070
50655071 // If the value is a constant, then we can potentially use larger sets.
50665072 switch (Align & 3) {
5067 case 2: // WORD aligned
5068 AVT = MVT::i16;
5069 ValReg = X86::AX;
5070 Val = (Val << 8) | Val;
5071 break;
5072 case 0: // DWORD aligned
5073 AVT = MVT::i32;
5074 ValReg = X86::EAX;
5075 Val = (Val << 8) | Val;
5076 Val = (Val << 16) | Val;
5077 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
5078 AVT = MVT::i64;
5079 ValReg = X86::RAX;
5080 Val = (Val << 32) | Val;
5081 }
5082 break;
5083 default: // Byte aligned
5084 AVT = MVT::i8;
5085 ValReg = X86::AL;
5086 Count = DAG.getIntPtrConstant(SizeVal);
5087 break;
5073 case 2: // WORD aligned
5074 AVT = MVT::i16;
5075 ValReg = X86::AX;
5076 Val = (Val << 8) | Val;
5077 break;
5078 case 0: // DWORD aligned
5079 AVT = MVT::i32;
5080 ValReg = X86::EAX;
5081 Val = (Val << 8) | Val;
5082 Val = (Val << 16) | Val;
5083 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
5084 AVT = MVT::i64;
5085 ValReg = X86::RAX;
5086 Val = (Val << 32) | Val;
5087 }
5088 break;
5089 default: // Byte aligned
5090 AVT = MVT::i8;
5091 ValReg = X86::AL;
5092 Count = DAG.getIntPtrConstant(SizeVal);
5093 break;
50885094 }
50895095
50905096 if (AVT.bitsGT(MVT::i8)) {
51525158
51535159 SDValue
51545160 X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
5155 SDValue Chain,
5156 SDValue Dst, SDValue Src,
5157 SDValue Size, unsigned Align,
5158 bool AlwaysInline,
5159 const Value *DstSV, uint64_t DstSVOff,
5160 const Value *SrcSV, uint64_t SrcSVOff){
5161
5161 SDValue Chain, SDValue Dst, SDValue Src,
5162 SDValue Size, unsigned Align,
5163 bool AlwaysInline,
5164 const Value *DstSV, uint64_t DstSVOff,
5165 const Value *SrcSV, uint64_t SrcSVOff) {
51625166 // This requires the copy size to be a constant, preferrably
51635167 // within a subtarget-specific limit.
51645168 ConstantSDNode *ConstantSize = dyn_cast(Size);
51685172 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
51695173 return SDValue();
51705174
5171 MVT AVT;
5172 unsigned BytesLeft = 0;
5173 if (Align >= 8 && Subtarget->is64Bit())
5175 /// If not DWORD aligned, call the library.
5176 if ((Align & 3) != 0)
5177 return SDValue();
5178
5179 // DWORD aligned
5180 MVT AVT = MVT::i32;
5181 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
51745182 AVT = MVT::i64;
5175 else if (Align >= 4)
5176 AVT = MVT::i32;
5177 else if (Align >= 2)
5178 AVT = MVT::i16;
5179 else
5180 AVT = MVT::i8;
51815183
51825184 unsigned UBytes = AVT.getSizeInBits() / 8;
51835185 unsigned CountVal = SizeVal / UBytes;
51845186 SDValue Count = DAG.getIntPtrConstant(CountVal);
5185 BytesLeft = SizeVal % UBytes;
5187 unsigned BytesLeft = SizeVal % UBytes;
51865188
51875189 SDValue InFlag(0, 0);
51885190 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
None ; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 3
0 ; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
11
22 @A = global [32 x i32] zeroinitializer
33 @B = global [32 x i32] zeroinitializer
44
55 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
66
7 define void @main() {
7 define void @main() nounwind {
88 ; dword copy
99 call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
1010 i8* bitcast ([32 x i32]* @B to i8*),
None ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
11 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
22
33 %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
66 i32, i32, i32, i32, i32, i32, i32, i32,
77 i32 }
88
9 define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) {
9 define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) nounwind {
1010 entry:
1111 %d = alloca %struct.s, align 16
1212 %tmp = getelementptr %struct.s* %d, i32 0, i32 0
None ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
11 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
22
33 %struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
1212
1313
1414 define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3,
15 i16 signext %a4, i16 signext %a5, i16 signext %a6) {
15 i16 signext %a4, i16 signext %a5, i16 signext %a6) nounwind {
1616 entry:
1717 %a = alloca %struct.s, align 16
1818 %tmp = getelementptr %struct.s* %a, i32 0, i32 0
None ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
11 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
22
33 %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
0 ; RUN: llvm-as < %s | llc -march=x86 | not grep rep
1 ; RUN: llvm-as < %s | llc -march=x86 | grep memset
2
3 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
4
5 define fastcc i32 @cli_scanzip(i32 %desc) nounwind {
6 entry:
7 br label %bb8.i.i.i.i
8
9 bb8.i.i.i.i: ; preds = %bb8.i.i.i.i, %entry
10 icmp eq i32 0, 0 ; :0 [#uses=1]
11 br i1 %0, label %bb61.i.i.i, label %bb8.i.i.i.i
12
13 bb32.i.i.i: ; preds = %bb61.i.i.i
14 ptrtoint i8* %tail.0.i.i.i to i32 ; :1 [#uses=1]
15 sub i32 0, %1 ; :2 [#uses=1]
16 icmp sgt i32 %2, 19 ; :3 [#uses=1]
17 br i1 %3, label %bb34.i.i.i, label %bb61.i.i.i
18
19 bb34.i.i.i: ; preds = %bb32.i.i.i
20 load i32* null, align 4 ; :4 [#uses=1]
21 icmp eq i32 %4, 101010256 ; :5 [#uses=1]
22 br i1 %5, label %bb8.i11.i.i.i, label %bb61.i.i.i
23
24 bb8.i11.i.i.i: ; preds = %bb8.i11.i.i.i, %bb34.i.i.i
25 icmp eq i32 0, 0 ; :6 [#uses=1]
26 br i1 %6, label %cli_dbgmsg.exit49.i, label %bb8.i11.i.i.i
27
28 cli_dbgmsg.exit49.i: ; preds = %bb8.i11.i.i.i
29 icmp eq [32768 x i8]* null, null ; :7 [#uses=1]
30 br i1 %7, label %bb1.i28.i, label %bb8.i.i
31
32 bb61.i.i.i: ; preds = %bb61.i.i.i, %bb34.i.i.i, %bb32.i.i.i, %bb8.i.i.i.i
33 %tail.0.i.i.i = getelementptr [1024 x i8]* null, i32 0, i32 0 ; [#uses=2]
34 load i8* %tail.0.i.i.i, align 1 ; :8 [#uses=1]
35 icmp eq i8 %8, 80 ; :9 [#uses=1]
36 br i1 %9, label %bb32.i.i.i, label %bb61.i.i.i
37
38 bb1.i28.i: ; preds = %cli_dbgmsg.exit49.i
39 call void @llvm.memset.i32( i8* null, i8 0, i32 88, i32 1 ) nounwind
40 unreachable
41
42 bb8.i.i: ; preds = %bb8.i.i, %cli_dbgmsg.exit49.i
43 br label %bb8.i.i
44 }
None ; RUN: llvm-as < %s | llc -march=x86 | grep stosb
0 ; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep stosl
1 ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movq | count 10
12
2 target triple = "i386-apple-darwin9"
3 %struct.S = type { [80 x i8] }
4
5 define %struct.S* @bork() {
3 define void @bork() nounwind {
64 entry:
7 call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 1 )
8 ret %struct.S* null
5 call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 4 )
6 ret void
97 }
108
119 declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind