llvm.org GIT mirror llvm / 231d4f5
Merge r227983 (along with r227815 and r227972 to make it apply cleanly) ------------------------------------------------------------------------ r227815 | spatel | 2015-02-02 09:47:30 -0800 (Mon, 02 Feb 2015) | 2 lines fix typo ------------------------------------------------------------------------ ------------------------------------------------------------------------ r227972 | spatel | 2015-02-03 07:37:18 -0800 (Tue, 03 Feb 2015) | 10 lines Improve test to actually check for a folded load. This test was checking for lack of a "movaps" (an aligned load) rather than a "movups" (an unaligned load). It also included a store which complicated the checking. Add specific CPU runs to prevent subtarget feature flag overrides from inhibiting this optimization. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r227983 | spatel | 2015-02-03 09:13:04 -0800 (Tue, 03 Feb 2015) | 22 lines Fix program crashes due to alignment exceptions generated for SSE memop instructions (PR22371). r224330 introduced a bug by misinterpreting the "FeatureVectorUAMem" bit. The commit log says that change did not affect anything, but that's not correct. That change allowed SSE instructions to have unaligned mem operands folded into math ops, and that's not allowed in the default specification for any SSE variant. The bug is exposed when compiling for an AVX-capable CPU that had this feature flag but without enabling AVX codegen. Another mistake in r224330 was not adding the feature flag to all AVX CPUs; the AMD chips were excluded. This is part of the fix for PR22371 ( http://llvm.org/bugs/show_bug.cgi?id=22371 ). This feature bit is SSE-specific, so I've renamed it to "FeatureSSEUnalignedMem". Changed the existing test case for the feature bit to reflect the new name and renamed the test file itself to better reflect the feature. Added runs to fold-vex.ll to check for the failing codegen. Note that the feature bit is not set by default on any CPU because it may require a configuration register setting to enable the enhanced unaligned behavior. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228323 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 5 years ago
7 changed file(s) with 51 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
131131 def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
132132 "Enable XOP instructions",
133133 [FeatureFMA4]>;
134 def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
135 "HasVectorUAMem", "true",
136 "Allow unaligned memory operands on vector/SIMD instructions">;
134 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
135 "HasSSEUnalignedMem", "true",
136 "Allow unaligned memory operands with SSE instructions">;
137137 def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
138138 "Enable AES instructions",
139139 [FeatureSSE2]>;
308308 FeatureCMPXCHG16B,
309309 FeatureFastUAMem,
310310 FeatureSlowUAMem32,
311 FeatureVectorUAMem,
312311 FeaturePOPCNT,
313312 FeatureAES,
314313 FeaturePCLMUL
321320 FeatureCMPXCHG16B,
322321 FeatureFastUAMem,
323322 FeatureSlowUAMem32,
324 FeatureVectorUAMem,
325323 FeaturePOPCNT,
326324 FeatureAES,
327325 FeaturePCLMUL,
336334 FeatureAVX2,
337335 FeatureCMPXCHG16B,
338336 FeatureFastUAMem,
339 FeatureVectorUAMem,
340337 FeaturePOPCNT,
341338 FeatureAES,
342339 FeaturePCLMUL,
359356 FeatureAVX2,
360357 FeatureCMPXCHG16B,
361358 FeatureFastUAMem,
362 FeatureVectorUAMem,
363359 FeaturePOPCNT,
364360 FeatureAES,
365361 FeaturePCLMUL,
387383 FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
388384 FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
389385 FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
390 FeatureSlowIncDec, FeatureVectorUAMem]>;
386 FeatureSlowIncDec]>;
391387 def : KnightsLandingProc<"knl">;
392388
393389 // FIXME: define SKX model
398394 FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
399395 FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
400396 FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
401 FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>;
397 FeatureSlowIncDec, FeatureSGX]>;
402398 def : SkylakeProc<"skylake">;
403399 def : SkylakeProc<"skx">; // Legacy alias.
404400
423423 // setting a feature bit in the processor (on startup, for example).
424424 // Opteron 10h and later implement such a feature.
425425 def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
426 return Subtarget->hasVectorUAMem()
426 return Subtarget->hasSSEUnalignedMem()
427427 || cast(N)->getAlignment() >= 16;
428428 }]>;
429429
264264 IsSHLDSlow = false;
265265 IsUAMemFast = false;
266266 IsUAMem32Slow = false;
267 HasVectorUAMem = false;
267 HasSSEUnalignedMem = false;
268268 HasCmpxchg16b = false;
269269 UseLeaForSP = false;
270270 HasSlowDivide32 = false;
161161 /// True if unaligned 32-byte memory accesses are slow.
162162 bool IsUAMem32Slow;
163163
164 /// HasVectorUAMem - True if SIMD operations can have unaligned memory
165 /// operands. This may require setting a feature bit in the processor.
166 bool HasVectorUAMem;
164 /// True if SSE operations can have unaligned memory operands.
165 /// This may require setting a configuration bit in the processor.
166 bool HasSSEUnalignedMem;
167167
168168 /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
169169 /// this is true for most x86-64 chips, but not the first AMD chips.
377377 bool isSHLDSlow() const { return IsSHLDSlow; }
378378 bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
379379 bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
380 bool hasVectorUAMem() const { return HasVectorUAMem; }
380 bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
381381 bool hasCmpxchg16b() const { return HasCmpxchg16b; }
382382 bool useLeaForSP() const { return UseLeaForSP; }
383383 bool hasSlowDivide32() const { return HasSlowDivide32; }
+0
-11
test/CodeGen/X86/2010-01-07-UAMemFeature.ll less more
None ; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s
1 ; CHECK: addps (
2
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
4 target triple = "x86_64-unknown-linux-gnu"
5
6 define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
7 %A = load <4 x float>* %P, align 4
8 %B = fadd <4 x float> %A, %In
9 ret <4 x float> %B
10 }
None ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
0 ; Use CPU parameters to ensure that a CPU-specific attribute is not overriding the AVX definition.
11
2 ;CHECK: @test
3 ; No need to load from memory. The operand will be loaded as part of th AND instr.
4 ;CHECK-NOT: vmovaps
5 ;CHECK: vandps
6 ;CHECK: ret
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx | FileCheck %s --check-prefix=SSE
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx -mattr=-avx | FileCheck %s --check-prefix=SSE
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefix=SSE
78
8 define void @test1(<8 x i32>* %p0, <8 x i32> %in1) nounwind {
9 entry:
10 %in0 = load <8 x i32>* %p0, align 2
11 %a = and <8 x i32> %in0, %in1
12 store <8 x i32> %a, <8 x i32>* undef
13 ret void
9 ; No need to load unaligned operand from memory using an explicit instruction with AVX.
10 ; The operand should be folded into the AND instr.
11
12 ; With SSE, folding memory operands into math/logic ops requires 16-byte alignment
13 ; unless specially configured on some CPUs such as AMD Family 10H.
14
15 define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind {
16 %in0 = load <4 x i32>* %p0, align 2
17 %a = and <4 x i32> %in0, %in1
18 ret <4 x i32> %a
19
20 ; CHECK-LABEL: @test1
21 ; CHECK-NOT: vmovups
22 ; CHECK: vandps (%rdi), %xmm0, %xmm0
23 ; CHECK-NEXT: ret
24
25 ; SSE-LABEL: @test1
26 ; SSE: movups (%rdi), %xmm1
27 ; SSE-NEXT: andps %xmm1, %xmm0
28 ; SSE-NEXT: ret
1429 }
1530
0 ; RUN: llc -mcpu=yonah -mattr=sse-unaligned-mem -march=x86 < %s | FileCheck %s
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
3 target triple = "x86_64-unknown-linux-gnu"
4
5 define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
6 %A = load <4 x float>* %P, align 4
7 %B = fadd <4 x float> %A, %In
8 ret <4 x float> %B
9
10 ; CHECK-LABEL: @foo
11 ; CHECK: addps (
12 }