llvm.org GIT mirror llvm / 3b88355
X86 memcpy: use REPMOVSB instead of REPMOVS{Q,D,W} for inline copies when the subtarget has fast strings. This has two advantages: - Speed is improved. For example, on Haswell thoughput improvements increase linearly with size from 256 to 512 bytes, after which they plateau: (e.g. 1% for 260 bytes, 25% for 400 bytes, 40% for 508 bytes). - Code is much smaller (no need to handle boundaries). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300957 91177308-0d34-0410-b5e6-96231b3b80d8 Clement Courbet 2 years ago
6 changed file(s) with 35 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
272272 "fast-shld-rotate", "HasFastSHLDRotate", "true",
273273 "SHLD can be used as a faster rotate">;
274274
275 // String operations (e.g. REP MOVS) are fast. See "REP String Enhancement" in
276 // the Intel Software Development Manual.
277 def FeatureFastString
278 : SubtargetFeature<
279 "fast-string", "HasFastString", "true",
280 "REP MOVS/STOS are fast">;
281
275282 //===----------------------------------------------------------------------===//
276283 // X86 processors supported.
277284 //===----------------------------------------------------------------------===//
497504 FeatureAVX2,
498505 FeatureBMI,
499506 FeatureBMI2,
507 FeatureFastString,
500508 FeatureFMA,
501509 FeatureLZCNT,
502510 FeatureMOVBE,
896896 def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
897897 def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
898898 def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
899 def HasFastString : Predicate<"Subtarget->hasFastString()">;
899900 def HasMFence : Predicate<"Subtarget->hasMFence()">;
900901
901902 //===----------------------------------------------------------------------===//
214214 return SDValue();
215215
216216 MVT AVT;
217 if (Align & 1)
217 if (Subtarget.hasFastString())
218 // If the target has fast strings, then it's at least as fast to use
219 // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
220 // BytesLeft.
221 AVT = MVT::i8;
222 else if (Align & 1)
218223 AVT = MVT::i8;
219224 else if (Align & 2)
220225 AVT = MVT::i16;
302302 HasFastVectorFSQRT = false;
303303 HasFastLZCNT = false;
304304 HasFastSHLDRotate = false;
305 HasFastString = false;
305306 HasSlowDivide32 = false;
306307 HasSlowDivide64 = false;
307308 PadShortFunctions = false;
230230
231231 /// True if SHLD based rotate is fast.
232232 bool HasFastSHLDRotate;
233
234 /// True if the processor has fast REP MOVS.
235 bool HasFastString;
233236
234237 /// True if the short functions should be padded to prevent
235238 /// a stall when returning too early.
471474 bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
472475 bool hasFastLZCNT() const { return HasFastLZCNT; }
473476 bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
477 bool hasFastString() const { return HasFastString; }
474478 bool hasSlowDivide32() const { return HasSlowDivide32; }
475479 bool hasSlowDivide64() const { return HasSlowDivide64; }
476480 bool padShortFunctions() const { return PadShortFunctions; }
0 ; RUN: llc -mtriple=x86_64-linux-gnu -mattr=-fast-string < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST
1 ; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+fast-string < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST
2
3 %struct.large = type { [4096 x i8] }
4
5 declare void @foo(%struct.large* align 8 byval) nounwind
6
7 define void @test1(%struct.large* nocapture %x) nounwind {
8 call void @foo(%struct.large* align 8 byval %x)
9 ret void
10
11 ; ALL-LABEL: test1:
12 ; NOFAST: rep;movsq
13 ; FAST: rep;movsb
14 }