llvm.org GIT mirror llvm / ccb6976
Do not isel load folding bt instructions for pentium m, core, core2, and AMD processors. These are significantly slower than a load followed by a bt of a register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61557 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 11 years ago
5 changed file(s) with 53 addition(s) and 29 deletion(s). Raw diff Collapse all Expand all
4747 def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
4848 "Support 64-bit instructions",
4949 [FeatureSSE2]>;
50 def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
51 "Bit testing of memory is slow">;
5052
5153 //===----------------------------------------------------------------------===//
5254 // X86 processors supported.
6567 def : Proc<"pentiumpro", []>;
6668 def : Proc<"pentium2", [FeatureMMX]>;
6769 def : Proc<"pentium3", [FeatureSSE1]>;
68 def : Proc<"pentium-m", [FeatureSSE2]>;
70 def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
6971 def : Proc<"pentium4", [FeatureSSE2]>;
70 def : Proc<"x86-64", [Feature64Bit]>;
71 def : Proc<"yonah", [FeatureSSE3]>;
72 def : Proc<"prescott", [FeatureSSE3]>;
73 def : Proc<"nocona", [FeatureSSE3, Feature64Bit]>;
74 def : Proc<"core2", [FeatureSSSE3, Feature64Bit]>;
75 def : Proc<"penryn", [FeatureSSE41, Feature64Bit]>;
72 def : Proc<"x86-64", [Feature64Bit, FeatureSlowBTMem]>;
73 def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
74 def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
75 def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
76 def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
77 def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
7678
7779 def : Proc<"k6", [FeatureMMX]>;
7880 def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
7981 def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
80 def : Proc<"athlon", [FeatureMMX, Feature3DNowA]>;
81 def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA]>;
82 def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA]>;
83 def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA]>;
84 def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA]>;
85 def : Proc<"k8", [Feature3DNowA, Feature64Bit]>;
86 def : Proc<"opteron", [Feature3DNowA, Feature64Bit]>;
87 def : Proc<"athlon64", [Feature3DNowA, Feature64Bit]>;
88 def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit]>;
82 def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
83 def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
84 def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
85 def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
86 def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
87 def : Proc<"k8", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
88 def : Proc<"opteron", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
89 def : Proc<"athlon64", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
90 def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
8991
9092 def : Proc<"winchip-c6", [FeatureMMX]>;
9193 def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
221221 def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
222222 def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
223223 def OptForSpeed : Predicate<"!OptForSize">;
224 def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
224225
225226 //===----------------------------------------------------------------------===//
226227 // X86 Instruction Format Definitions.
26652666 def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
26662667 "bt{w}\t{$src2, $src1|$src1, $src2}",
26672668 [(X86bt (loadi16 addr:$src1), GR16:$src2),
2668 (implicit EFLAGS)]>, OpSize, TB;
2669 (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
26692670 def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
26702671 "bt{l}\t{$src2, $src1|$src1, $src2}",
26712672 [(X86bt (loadi32 addr:$src1), GR32:$src2),
2672 (implicit EFLAGS)]>, TB;
2673 (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
26732674 } // Defs = [EFLAGS]
26742675
26752676 // Sign/Zero extenders
148148 return true;
149149 }
150150
151 static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
152 Family = (EAX >> 8) & 0xf; // Bits 8 - 11
153 Model = (EAX >> 4) & 0xf; // Bits 4 - 7
154 if (Family == 6 || Family == 0xf) {
155 if (Family == 0xf)
156 // Examine extended family ID if family ID is F.
157 Family += (EAX >> 20) & 0xff; // Bits 20 - 27
158 // Examine extended model ID if family ID is 6 or F.
159 Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
160 }
161 }
162
151163 void X86Subtarget::AutoDetectSubtargetFeatures() {
152164 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
153165 union {
168180 if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
169181 if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
170182
171 if (memcmp(text.c, "GenuineIntel", 12) == 0 ||
172 memcmp(text.c, "AuthenticAMD", 12) == 0) {
183 bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
184 bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
185 if (IsIntel || IsAMD) {
186 // Determine if bit test memory instructions are slow.
187 unsigned Family = 0;
188 unsigned Model = 0;
189 DetectFamilyModel(EAX, Family, Model);
190 IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
191
173192 X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
174193 HasX86_64 = (EDX >> 29) & 0x1;
175194 }
179198 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
180199 if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
181200 return "generic";
182 unsigned Family = (EAX >> 8) & 0xf; // Bits 8 - 11
183 unsigned Model = (EAX >> 4) & 0xf; // Bits 4 - 7
184 if (Family == 6 || Family == 0xf) {
185 if (Family == 0xf)
186 // Examine extended family ID if family ID is F.
187 Family += (EAX >> 20) & 0xff; // Bits 20 - 27
188 // Examine extended model ID if family ID is 6 or F.
189 Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
190 }
201 unsigned Family = 0;
202 unsigned Model = 0;
203 DetectFamilyModel(EAX, Family, Model);
191204
192205 X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
193206 bool Em64T = (EDX >> 29) & 0x1;
284297 , X86SSELevel(NoMMXSSE)
285298 , X863DNowLevel(NoThreeDNow)
286299 , HasX86_64(false)
300 , IsBTMemSlow(false)
287301 , DarwinVers(0)
288302 , IsLinux(false)
289303 , stackAlignment(8)
6363 /// HasX86_64 - True if the processor supports X86-64 instructions.
6464 ///
6565 bool HasX86_64;
66
67 /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
68 bool IsBTMemSlow;
6669
6770 /// DarwinVers - Nonzero if this is a darwin platform: the numeric
6871 /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
125128 bool hasSSE42() const { return X86SSELevel >= SSE42; }
126129 bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
127130 bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
131
132 bool isBTMemSlow() const { return IsBTMemSlow; }
128133
129134 unsigned getAsmFlavor() const {
130135 return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
0 ; RUN: llvm-as < %s | llc | grep btl
1 ; RUN: llvm-as < %s | llc -mcpu=pentium4 | grep btl | grep esp
2 ; RUN: llvm-as < %s | llc -mcpu=penryn | grep btl | not grep esp
13 ; PR3253
24 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
35 target triple = "i386-apple-darwin8"