llvm.org GIT mirror llvm / 5fb1d32
[x86-64] allow mfence even with -mno-sse (PR23203) As shown in: https://llvm.org/bugs/show_bug.cgi?id=23203 ...we currently die because lowering believes that mfence is allowed without SSE2 on x86-64, but the instruction def doesn't know that. I don't know if allowing mfence without SSE is right, but if not, at least now it's consistently wrong. :) Differential Revision: http://reviews.llvm.org/D17219 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260828 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 4 years ago
5 changed file(s) with 42 addition(s) and 15 deletion(s). Raw diff Collapse all Expand all
1971619716 }
1971719717 }
1971819718
19719 static bool hasMFENCE(const X86Subtarget &Subtarget) {
19720 // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
19721 // no-sse2). There isn't any reason to disable it if the target processor
19722 // supports it.
19723 return Subtarget.hasSSE2() || Subtarget.is64Bit();
19724 }
19725
1972619719 LoadInst *
1972719720 X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
1972819721 unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
1976219755 // the IR level, so we must wrap it in an intrinsic.
1976319756 return nullptr;
1976419757
19765 if (!hasMFENCE(Subtarget))
19758 if (!Subtarget.hasMFence())
1976619759 // FIXME: it might make sense to use a locked operation here but on a
1976719760 // different cache-line to prevent cache-line bouncing. In practice it
1976819761 // is probably a small win, and x86 processors without mfence are rare
1979319786 // The only fence that needs an instruction is a sequentially-consistent
1979419787 // cross-thread fence.
1979519788 if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
19796 if (hasMFENCE(Subtarget))
19789 if (Subtarget.hasMFence())
1979719790 return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
1979819791
1979919792 SDValue Chain = Op.getOperand(0);
844844 def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
845845 def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
846846 def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
847 def HasMFence : Predicate<"Subtarget->hasMFence()">;
847848
848849 //===----------------------------------------------------------------------===//
849850 // X86 Instruction Format Definitions.
37613761
37623762 let SchedRW = [WriteFence] in {
37633763 // Load, store, and memory fence
3764 // TODO: As with mfence, we may want to ease the availablity of sfence/lfence
3765 // to include any 64-bit target.
37643766 def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
37653767 "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>,
37663768 PS, Requires<[HasSSE1]>;
37693771 TB, Requires<[HasSSE2]>;
37703772 def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
37713773 "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
3772 TB, Requires<[HasSSE2]>;
3774 TB, Requires<[HasMFence]>;
37733775 } // SchedRW
37743776
37753777 def : Pat<(X86SFence), (SFENCE)>;
445445 bool isSLM() const { return X86ProcFamily == IntelSLM; }
446446 bool useSoftFloat() const { return UseSoftFloat; }
447447
448 /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
449 /// no-sse2). There isn't any reason to disable it if the target processor
450 /// supports it.
451 bool hasMFence() const { return hasSSE2() || is64Bit(); }
452
448453 const Triple &getTargetTriple() const { return TargetTriple; }
449454
450455 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
None ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64
3
4 ; It doesn't matter if an x86-64 target has specified "no-sse2"; we still can use mfence.
15
26 define void @test() {
3 ; CHECK-LABEL: test:
4 ; CHECK: # BB#0:
5 ; CHECK-NEXT: mfence
6 ; CHECK-NEXT: retl
7 ; X32-LABEL: test:
8 ; X32: # BB#0:
9 ; X32-NEXT: mfence
10 ; X32-NEXT: retl
11 ;
12 ; X64-LABEL: test:
13 ; X64: # BB#0:
14 ; X64-NEXT: mfence
15 ; X64-NEXT: retq
716 fence seq_cst
817 ret void
918 }
1019
20 define i32 @fence(i32* %ptr) {
21 ; X32-LABEL: fence:
22 ; X32: # BB#0:
23 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
24 ; X32-NEXT: mfence
25 ; X32-NEXT: movl (%eax), %eax
26 ; X32-NEXT: retl
27 ;
28 ; X64-LABEL: fence:
29 ; X64: # BB#0:
30 ; X64-NEXT: mfence
31 ; X64-NEXT: movl (%rdi), %eax
32 ; X64-NEXT: retq
33 %atomic = atomicrmw add i32* %ptr, i32 0 seq_cst
34 ret i32 %atomic
35 }
36