llvm.org GIT mirror llvm / de9e4c7
Fix sfence, lfence, mfence, and clflush to be able to be selected when AVX is enabled. Fix monitor and mwait to require SSE3 or AVX, previously they worked even if SSE3 was disabled. Make prefetch instructions not set the execution domain since they don't use XMM registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147409 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 years ago
5 changed file(s) with 72 addition(s) and 23 deletion(s). Raw diff Collapse all Expand all
338338 list pattern>
339339 : I, TB,
340340 Requires<[HasAVX]>;
341 class VoPSI o, Format F, dag outs, dag ins, string asm,
342 list pattern>
343 : I, TB,
344 Requires<[HasXMM]>;
345341
346342 // SSE2 Instruction Templates:
347343 //
474474 def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
475475 def HasXMM : Predicate<"Subtarget->hasXMM()">;
476476 def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
477 def HasSSE3orAVX : Predicate<"Subtarget->hasSSE3orAVX()">;
477478
478479 def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
479480 def HasAES : Predicate<"Subtarget->hasAES()">;
32523252 //===----------------------------------------------------------------------===//
32533253
32543254 // Prefetch intrinsic.
3255 def PREFETCHT0 : VoPSI<0x18, MRM1m, (outs), (ins i8mem:$src),
3256 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
3257 def PREFETCHT1 : VoPSI<0x18, MRM2m, (outs), (ins i8mem:$src),
3258 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
3259 def PREFETCHT2 : VoPSI<0x18, MRM3m, (outs), (ins i8mem:$src),
3260 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
3261 def PREFETCHNTA : VoPSI<0x18, MRM0m, (outs), (ins i8mem:$src),
3262 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
3255 let Predicates = [HasXMM] in {
3256 def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
3257 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
3258 def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src),
3259 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB;
3260 def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src),
3261 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB;
3262 def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
3263 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB;
3264 }
32633265
32643266 // Flush cache
32653267 def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
32663268 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
3267 TB, Requires<[HasSSE2]>;
3269 TB, Requires<[HasXMMInt]>;
32683270
32693271 // Pause. This "instruction" is encoded as "rep; nop", so even though it
32703272 // was introduced with SSE2, it's backward compatible.
32723274
32733275 // Load, store, and memory fence
32743276 def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
3275 "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;
3277 "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasXMM]>;
32763278 def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
3277 "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
3279 "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasXMMInt]>;
32783280 def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
3279 "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
3281 "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasXMMInt]>;
32803282
32813283 def : Pat<(X86SFence), (SFENCE)>;
32823284 def : Pat<(X86LFence), (LFENCE)>;
54625464
54635465 let usesCustomInserter = 1 in {
54645466 def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
5465 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
5467 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
5468 Requires<[HasSSE3orAVX]>;
54665469 def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
5467 [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
5470 [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
5471 Requires<[HasSSE3orAVX]>;
54685472 }
54695473
54705474 let Uses = [EAX, ECX, EDX] in
54715475 def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
5472 Requires<[HasSSE3]>;
5476 Requires<[HasSSE3orAVX]>;
54735477 let Uses = [ECX, EAX] in
54745478 def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
5475 Requires<[HasSSE3]>;
5479 Requires<[HasSSE3orAVX]>;
54765480
54775481 def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
54785482 def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
None ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
0 ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse3 | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse3 | FileCheck %s -check-prefix=WIN64
22 ; PR8573
33
44 ; CHECK: foo:
24802480 }
24812481 declare void @llvm.x86.avx.vzeroupper() nounwind
24822482
2483
2483 ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
2484
2485 ; CHECK: monitor
2486 define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
2487 entry:
2488 tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
2489 ret void
2490 }
2491 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
2492
2493 ; CHECK: mwait
2494 define void @mwait(i32 %E, i32 %H) nounwind {
2495 entry:
2496 tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
2497 ret void
2498 }
2499 declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
2500
2501 ; CHECK: sfence
2502 define void @sfence() nounwind {
2503 entry:
2504 tail call void @llvm.x86.sse.sfence()
2505 ret void
2506 }
2507 declare void @llvm.x86.sse.sfence() nounwind
2508
2509 ; CHECK: lfence
2510 define void @lfence() nounwind {
2511 entry:
2512 tail call void @llvm.x86.sse2.lfence()
2513 ret void
2514 }
2515 declare void @llvm.x86.sse2.lfence() nounwind
2516
2517 ; CHECK: mfence
2518 define void @mfence() nounwind {
2519 entry:
2520 tail call void @llvm.x86.sse2.mfence()
2521 ret void
2522 }
2523 declare void @llvm.x86.sse2.mfence() nounwind
2524
2525 ; CHECK: clflush
2526 define void @clflush(i8* %p) nounwind {
2527 entry:
2528 tail call void @llvm.x86.sse2.clflush(i8* %p)
2529 ret void
2530 }
2531 declare void @llvm.x86.sse2.clflush(i8*) nounwind