llvm.org GIT mirror llvm / e38f45e
[ARM] Use AEABI aligned function variants AEABI defines aligned variants of memcpy etc. that can be faster than the default version due to not having to do alignment checks. When emitting target code for these functions make use of these aligned variants if possible. Also convert memset to memclr if possible. Differential Revision: http://reviews.llvm.org/D8060 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237127 91177308-0d34-0410-b5e6-96231b3b80d8 John Brawn 5 years ago
3 changed file(s) with 237 addition(s) and 96 deletion(s). Raw diff Collapse all Expand all
2121 : TargetSelectionDAGInfo(&DL) {}
2222
2323 ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
24 }
25
26 // Emit, if possible, a specialized version of the given Libcall. Typically this
27 // means selecting the appropriately aligned version, but we also convert memset
28 // of 0 into memclr.
29 SDValue ARMSelectionDAGInfo::
30 EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
31 SDValue Chain,
32 SDValue Dst, SDValue Src,
33 SDValue Size, unsigned Align,
34 RTLIB::Libcall LC) const {
35 const ARMSubtarget &Subtarget =
36 DAG.getMachineFunction().getSubtarget();
37 const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
38
39 // Only use a specialized AEABI function if the default version of this
40 // Libcall is an AEABI function.
41 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
42 return SDValue();
43
44 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
45 // able to translate memset to memclr and use the value to index the function
46 // name array.
47 enum {
48 AEABI_MEMCPY = 0,
49 AEABI_MEMMOVE,
50 AEABI_MEMSET,
51 AEABI_MEMCLR
52 } AEABILibcall;
53 switch (LC) {
54 case RTLIB::MEMCPY:
55 AEABILibcall = AEABI_MEMCPY;
56 break;
57 case RTLIB::MEMMOVE:
58 AEABILibcall = AEABI_MEMMOVE;
59 break;
60 case RTLIB::MEMSET:
61 AEABILibcall = AEABI_MEMSET;
62 if (ConstantSDNode *ConstantSrc = dyn_cast(Src))
63 if (ConstantSrc->getZExtValue() == 0)
64 AEABILibcall = AEABI_MEMCLR;
65 break;
66 default:
67 return SDValue();
68 }
69
70 // Choose the most-aligned libcall variant that we can
71 enum {
72 ALIGN1 = 0,
73 ALIGN4,
74 ALIGN8
75 } AlignVariant;
76 if ((Align & 7) == 0)
77 AlignVariant = ALIGN8;
78 else if ((Align & 3) == 0)
79 AlignVariant = ALIGN4;
80 else
81 AlignVariant = ALIGN1;
82
83 TargetLowering::ArgListTy Args;
84 TargetLowering::ArgListEntry Entry;
85 Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext());
86 Entry.Node = Dst;
87 Args.push_back(Entry);
88 if (AEABILibcall == AEABI_MEMCLR) {
89 Entry.Node = Size;
90 Args.push_back(Entry);
91 } else if (AEABILibcall == AEABI_MEMSET) {
92 // Adjust parameters for memset, EABI uses format (ptr, size, value),
93 // GNU library uses (ptr, value, size)
94 // See RTABI section 4.3.4
95 Entry.Node = Size;
96 Args.push_back(Entry);
97
98 // Extend or truncate the argument to be an i32 value for the call.
99 if (Src.getValueType().bitsGT(MVT::i32))
100 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
101 else if (Src.getValueType().bitsLT(MVT::i32))
102 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
103
104 Entry.Node = Src;
105 Entry.Ty = Type::getInt32Ty(*DAG.getContext());
106 Entry.isSExt = false;
107 Args.push_back(Entry);
108 } else {
109 Entry.Node = Src;
110 Args.push_back(Entry);
111
112 Entry.Node = Size;
113 Args.push_back(Entry);
114 }
115
116 char const *FunctionNames[4][3] = {
117 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
118 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
119 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
120 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
121 };
122 TargetLowering::CallLoweringInfo CLI(DAG);
123 CLI.setDebugLoc(dl).setChain(Chain)
124 .setCallee(TLI->getLibcallCallingConv(LC),
125 Type::getVoidTy(*DAG.getContext()),
126 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
127 TLI->getPointerTy()), std::move(Args), 0)
128 .setDiscardResult();
129 std::pair CallResult = TLI->LowerCallTo(CLI);
130
131 return CallResult.second;
24132 }
25133
26134 SDValue
41149 // within a subtarget-specific limit.
42150 ConstantSDNode *ConstantSize = dyn_cast(Size);
43151 if (!ConstantSize)
44 return SDValue();
152 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
153 RTLIB::MEMCPY);
45154 uint64_t SizeVal = ConstantSize->getZExtValue();
46155 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
47 return SDValue();
156 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
157 RTLIB::MEMCPY);
48158
49159 unsigned BytesLeft = SizeVal & 3;
50160 unsigned NumMemOps = SizeVal >> 2;
141251 makeArrayRef(TFOps, i));
142252 }
143253
144 // Adjust parameters for memset, EABI uses format (ptr, size, value),
145 // GNU library uses (ptr, value, size)
146 // See RTABI section 4.3.4
254
255 SDValue ARMSelectionDAGInfo::
256 EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
257 SDValue Chain,
258 SDValue Dst, SDValue Src,
259 SDValue Size, unsigned Align,
260 bool isVolatile,
261 MachinePointerInfo DstPtrInfo,
262 MachinePointerInfo SrcPtrInfo) const {
263 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
264 RTLIB::MEMMOVE);
265 }
266
267
147268 SDValue ARMSelectionDAGInfo::
148269 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
149270 SDValue Chain, SDValue Dst,
150271 SDValue Src, SDValue Size,
151272 unsigned Align, bool isVolatile,
152273 MachinePointerInfo DstPtrInfo) const {
153 const ARMSubtarget &Subtarget =
154 DAG.getMachineFunction().getSubtarget();
155 // Use default for non-AAPCS (or MachO) subtargets
156 if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() ||
157 Subtarget.isTargetWindows())
158 return SDValue();
159
160 const ARMTargetLowering &TLI = *Subtarget.getTargetLowering();
161 TargetLowering::ArgListTy Args;
162 TargetLowering::ArgListEntry Entry;
163
164 // First argument: data pointer
165 Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
166 Entry.Node = Dst;
167 Entry.Ty = IntPtrTy;
168 Args.push_back(Entry);
169
170 // Second argument: buffer size
171 Entry.Node = Size;
172 Entry.Ty = IntPtrTy;
173 Entry.isSExt = false;
174 Args.push_back(Entry);
175
176 // Extend or truncate the argument to be an i32 value for the call.
177 if (Src.getValueType().bitsGT(MVT::i32))
178 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
179 else
180 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
181
182 // Third argument: value to fill
183 Entry.Node = Src;
184 Entry.Ty = Type::getInt32Ty(*DAG.getContext());
185 Entry.isSExt = true;
186 Args.push_back(Entry);
187
188 // Emit __eabi_memset call
189 TargetLowering::CallLoweringInfo CLI(DAG);
190 CLI.setDebugLoc(dl).setChain(Chain)
191 .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET),
192 Type::getVoidTy(*DAG.getContext()),
193 DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
194 TLI.getPointerTy()), std::move(Args), 0)
195 .setDiscardResult();
196
197 std::pair CallResult = TLI.LowerCallTo(CLI);
198 return CallResult.second;
199 }
274 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
275 RTLIB::MEMSET);
276 }
4747 MachinePointerInfo DstPtrInfo,
4848 MachinePointerInfo SrcPtrInfo) const override;
4949
50 SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
51 SDValue Chain,
52 SDValue Dst, SDValue Src,
53 SDValue Size, unsigned Align, bool isVolatile,
54 MachinePointerInfo DstPtrInfo,
55 MachinePointerInfo SrcPtrInfo) const override;
56
5057 // Adjust parameters for memset, see RTABI section 4.3.4
5158 SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
5259 SDValue Chain,
5461 SDValue Op3, unsigned Align,
5562 bool isVolatile,
5663 MachinePointerInfo DstPtrInfo) const override;
64
65 SDValue EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
66 SDValue Chain,
67 SDValue Dst, SDValue Src,
68 SDValue Size, unsigned Align,
69 RTLIB::Libcall LC) const;
5770 };
5871
5972 }
1717 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
1818
1919 ; EABI memset swaps arguments
20 ; CHECK-IOS: mov r1, #1
21 ; CHECK-IOS: memset
22 ; CHECK-DARWIN: movs r1, #1
23 ; CHECK-DARWIN: memset
24 ; CHECK-EABI: mov r2, #1
25 ; CHECK-EABI: __aeabi_memset
26 call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false)
27
28 ; EABI uses memclr if value set to 0
2029 ; CHECK-IOS: mov r1, #0
2130 ; CHECK-IOS: memset
2231 ; CHECK-DARWIN: movs r1, #0
2332 ; CHECK-DARWIN: memset
24 ; CHECK-EABI: mov r2, #0
25 ; CHECK-EABI: __aeabi_memset
33 ; CHECK-EABI: __aeabi_memclr
2634 call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false)
35
36 ; EABI uses aligned function variants if possible
37
38 ; CHECK-IOS: memmove
39 ; CHECK-DARWIN: memmove
40 ; CHECK-EABI: __aeabi_memmove4
41 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false)
42
43 ; CHECK-IOS: memcpy
44 ; CHECK-DARWIN: memcpy
45 ; CHECK-EABI: __aeabi_memcpy4
46 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false)
47
48 ; CHECK-IOS: memset
49 ; CHECK-DARWIN: memset
50 ; CHECK-EABI: __aeabi_memset4
51 call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 4, i1 false)
52
53 ; CHECK-IOS: memset
54 ; CHECK-DARWIN: memset
55 ; CHECK-EABI: __aeabi_memclr4
56 call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 4, i1 false)
57
58 ; CHECK-IOS: memmove
59 ; CHECK-DARWIN: memmove
60 ; CHECK-EABI: __aeabi_memmove8
61 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
62
63 ; CHECK-IOS: memcpy
64 ; CHECK-DARWIN: memcpy
65 ; CHECK-EABI: __aeabi_memcpy8
66 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
67
68 ; CHECK-IOS: memset
69 ; CHECK-DARWIN: memset
70 ; CHECK-EABI: __aeabi_memset8
71 call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 8, i1 false)
72
73 ; CHECK-IOS: memset
74 ; CHECK-DARWIN: memset
75 ; CHECK-EABI: __aeabi_memclr8
76 call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 8, i1 false)
77
2778 unreachable
2879 }
2980
52103 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
53104
54105 ; CHECK-IOS: mov r0, sp
55 ; CHECK-IOS: mov r1, #0
56 ; CHECK-IOS: memset
57 ; CHECK-DARINW: add r0, sp, #4
58 ; CHECK-DARWIN: movs r1, #0
106 ; CHECK-IOS: mov r1, #1
107 ; CHECK-IOS: memset
108 ; CHECK-DARWIN: add r0, sp, #4
109 ; CHECK-DARWIN: movs r1, #1
59110 ; CHECK-DARWIN: memset
60111 ; CHECK-EABI: add r0, sp, #4
61 ; CHECK-EABI: mov r2, #0
112 ; CHECK-EABI: mov r2, #1
62113 ; CHECK-EABI: __aeabi_memset
63114 %arr2 = alloca [9 x i8], align 1
64115 %2 = bitcast [9 x i8]* %arr2 to i8*
65 call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
116 call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
66117
67118 unreachable
68119 }
89140 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
90141
91142 ; CHECK: {{add(.w)? r0, sp, #3}}
92 ; CHECK-IOS: mov r1, #0
93 ; CHECK-IOS: memset
94 ; CHECK-DARWIN: movs r1, #0
95 ; CHECK-DARWIN: memset
96 ; CHECK-EABI: mov r2, #0
143 ; CHECK-IOS: mov r1, #1
144 ; CHECK-IOS: memset
145 ; CHECK-DARWIN: movs r1, #1
146 ; CHECK-DARWIN: memset
147 ; CHECK-EABI: mov r2, #1
97148 ; CHECK-EABI: __aeabi_memset
98149 %arr2 = alloca [7 x i8], align 1
99150 %2 = bitcast [7 x i8]* %arr2 to i8*
100 call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
151 call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
101152
102153 unreachable
103154 }
124175 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
125176
126177 ; CHECK: {{add(.w)? r., sp, #(1|5)}}
127 ; CHECK-IOS: mov r1, #0
128 ; CHECK-IOS: memset
129 ; CHECK-DARWIN: movs r1, #0
130 ; CHECK-DARWIN: memset
131 ; CHECK-EABI: mov r2, #0
178 ; CHECK-IOS: mov r1, #1
179 ; CHECK-IOS: memset
180 ; CHECK-DARWIN: movs r1, #1
181 ; CHECK-DARWIN: memset
182 ; CHECK-EABI: mov r2, #1
132183 ; CHECK-EABI: __aeabi_memset
133184 %arr2 = alloca [9 x i8], align 1
134185 %2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4
135 call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
186 call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
136187
137188 unreachable
138189 }
159210 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
160211
161212 ; CHECK: {{add(.w)? r., sp, #(1|5)}}
162 ; CHECK-IOS: mov r1, #0
163 ; CHECK-IOS: memset
164 ; CHECK-DARWIN: movs r1, #0
165 ; CHECK-DARWIN: memset
166 ; CHECK-EABI: mov r2, #0
213 ; CHECK-IOS: mov r1, #1
214 ; CHECK-IOS: memset
215 ; CHECK-DARWIN: movs r1, #1
216 ; CHECK-DARWIN: memset
217 ; CHECK-EABI: mov r2, #1
167218 ; CHECK-EABI: __aeabi_memset
168219 %arr2 = alloca [13 x i8], align 1
169220 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1
170 call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
221 call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
171222
172223 unreachable
173224 }
194245 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
195246
196247 ; CHECK: {{add(.w)? r., sp, #(1|5)}}
197 ; CHECK-IOS: mov r1, #0
198 ; CHECK-IOS: memset
199 ; CHECK-DARWIN: movs r1, #0
200 ; CHECK-DARWIN: memset
201 ; CHECK-EABI: mov r2, #0
248 ; CHECK-IOS: mov r1, #1
249 ; CHECK-IOS: memset
250 ; CHECK-DARWIN: movs r1, #1
251 ; CHECK-DARWIN: memset
252 ; CHECK-EABI: mov r2, #1
202253 ; CHECK-EABI: __aeabi_memset
203254 %arr2 = alloca [13 x i8], align 1
204255 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i
205 call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
256 call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
206257
207258 unreachable
208259 }
229280 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
230281
231282 ; CHECK: {{add(.w)? r., sp, #(1|5)}}
232 ; CHECK-IOS: mov r1, #0
233 ; CHECK-IOS: memset
234 ; CHECK-DARWIN: movs r1, #0
235 ; CHECK-DARWIN: memset
236 ; CHECK-EABI: mov r2, #0
283 ; CHECK-IOS: mov r1, #1
284 ; CHECK-IOS: memset
285 ; CHECK-DARWIN: movs r1, #1
286 ; CHECK-DARWIN: memset
287 ; CHECK-EABI: mov r2, #1
237288 ; CHECK-EABI: __aeabi_memset
238289 %arr2 = alloca [13 x i8], align 1
239290 %2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4
240 call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
291 call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
241292
242293 unreachable
243294 }
264315 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
265316
266317 ; CHECK: {{add(.w)? r., sp, #(1|5)}}
267 ; CHECK-IOS: mov r1, #0
268 ; CHECK-IOS: memset
269 ; CHECK-DARWIN: movs r1, #0
270 ; CHECK-DARWIN: memset
271 ; CHECK-EABI: mov r2, #0
318 ; CHECK-IOS: mov r1, #1
319 ; CHECK-IOS: memset
320 ; CHECK-DARWIN: movs r1, #1
321 ; CHECK-DARWIN: memset
322 ; CHECK-EABI: mov r2, #1
272323 ; CHECK-EABI: __aeabi_memset
273324 %arr2 = alloca [13 x i8], align 1
274325 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16
275 call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
326 call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
276327
277328 unreachable
278329 }