llvm.org GIT mirror llvm / 8ec862f
[X86] Mark EMMS and FEMMS as clobbering MM0-7 and ST0-7. This fixes the test case in PR35982 by preventing MMX instructions that read MM0-7 from being moved below EMMS/FEMMS by the post RA scheduler. Though as discussed in bugzilla, this is not a complete fix. There is still the possibility of reordering in IR or by the pre-RA scheduler. Differential Revision: https://reviews.llvm.org/D57298 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352660 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 1 year, 24 days ago
3 changed file(s) with 48 addition(s) and 88 deletion(s). Raw diff Collapse all Expand all
7272 defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
7373 defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
7474
75 let SchedRW = [WriteEMMS] in
75 let SchedRW = [WriteEMMS],
76 Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
77 ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
7678 def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
7779 [(int_x86_mmx_femms)]>, TB;
7880
151151 // MMX EMMS Instruction
152152 //===----------------------------------------------------------------------===//
153153
154 let SchedRW = [WriteEMMS] in
154 let SchedRW = [WriteEMMS],
155 Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
156 ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
155157 def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
156158
157159 //===----------------------------------------------------------------------===//
22 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=true | FileCheck %s --check-prefixes=CHECK,POST
33
44 define float @PR35982_emms(<1 x i64>) nounwind {
5 ; NOPOST-LABEL: PR35982_emms:
6 ; NOPOST: # %bb.0:
7 ; NOPOST-NEXT: pushl %ebp
8 ; NOPOST-NEXT: movl %esp, %ebp
9 ; NOPOST-NEXT: andl $-8, %esp
10 ; NOPOST-NEXT: subl $16, %esp
11 ; NOPOST-NEXT: movl 8(%ebp), %eax
12 ; NOPOST-NEXT: movl 12(%ebp), %ecx
13 ; NOPOST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
14 ; NOPOST-NEXT: movl %eax, {{[0-9]+}}(%esp)
15 ; NOPOST-NEXT: movq {{[0-9]+}}(%esp), %mm0
16 ; NOPOST-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
17 ; NOPOST-NEXT: movd %mm0, %ecx
18 ; NOPOST-NEXT: emms
19 ; NOPOST-NEXT: movl %eax, (%esp)
20 ; NOPOST-NEXT: fildl (%esp)
21 ; NOPOST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22 ; NOPOST-NEXT: fiaddl {{[0-9]+}}(%esp)
23 ; NOPOST-NEXT: movl %ebp, %esp
24 ; NOPOST-NEXT: popl %ebp
25 ; NOPOST-NEXT: retl
26 ;
27 ; POST-LABEL: PR35982_emms:
28 ; POST: # %bb.0:
29 ; POST-NEXT: pushl %ebp
30 ; POST-NEXT: movl %esp, %ebp
31 ; POST-NEXT: andl $-8, %esp
32 ; POST-NEXT: subl $16, %esp
33 ; POST-NEXT: movl 8(%ebp), %eax
34 ; POST-NEXT: movl 12(%ebp), %ecx
35 ; POST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
36 ; POST-NEXT: movl %eax, {{[0-9]+}}(%esp)
37 ; POST-NEXT: movq {{[0-9]+}}(%esp), %mm0
38 ; POST-NEXT: emms
39 ; POST-NEXT: movl %eax, (%esp)
40 ; POST-NEXT: fildl (%esp)
41 ; POST-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
42 ; POST-NEXT: movd %mm0, %ecx
43 ; POST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
44 ; POST-NEXT: fiaddl {{[0-9]+}}(%esp)
45 ; POST-NEXT: movl %ebp, %esp
46 ; POST-NEXT: popl %ebp
47 ; POST-NEXT: retl
5 ; CHECK-LABEL: PR35982_emms:
6 ; CHECK: # %bb.0:
7 ; CHECK-NEXT: pushl %ebp
8 ; CHECK-NEXT: movl %esp, %ebp
9 ; CHECK-NEXT: andl $-8, %esp
10 ; CHECK-NEXT: subl $16, %esp
11 ; CHECK-NEXT: movl 8(%ebp), %eax
12 ; CHECK-NEXT: movl 12(%ebp), %ecx
13 ; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
14 ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
15 ; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0
16 ; CHECK-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
17 ; CHECK-NEXT: movd %mm0, %ecx
18 ; CHECK-NEXT: emms
19 ; CHECK-NEXT: movl %eax, (%esp)
20 ; CHECK-NEXT: fildl (%esp)
21 ; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22 ; CHECK-NEXT: fiaddl {{[0-9]+}}(%esp)
23 ; CHECK-NEXT: movl %ebp, %esp
24 ; CHECK-NEXT: popl %ebp
25 ; CHECK-NEXT: retl
4826 %2 = bitcast <1 x i64> %0 to <2 x i32>
4927 %3 = extractelement <2 x i32> %2, i32 0
5028 %4 = extractelement <1 x i64> %0, i32 0
6038 }
6139
6240 define float @PR35982_femms(<1 x i64>) nounwind {
63 ; NOPOST-LABEL: PR35982_femms:
64 ; NOPOST: # %bb.0:
65 ; NOPOST-NEXT: pushl %ebp
66 ; NOPOST-NEXT: movl %esp, %ebp
67 ; NOPOST-NEXT: andl $-8, %esp
68 ; NOPOST-NEXT: subl $16, %esp
69 ; NOPOST-NEXT: movl 8(%ebp), %eax
70 ; NOPOST-NEXT: movl 12(%ebp), %ecx
71 ; NOPOST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
72 ; NOPOST-NEXT: movl %eax, {{[0-9]+}}(%esp)
73 ; NOPOST-NEXT: movq {{[0-9]+}}(%esp), %mm0
74 ; NOPOST-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
75 ; NOPOST-NEXT: movd %mm0, %ecx
76 ; NOPOST-NEXT: femms
77 ; NOPOST-NEXT: movl %eax, (%esp)
78 ; NOPOST-NEXT: fildl (%esp)
79 ; NOPOST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
80 ; NOPOST-NEXT: fiaddl {{[0-9]+}}(%esp)
81 ; NOPOST-NEXT: movl %ebp, %esp
82 ; NOPOST-NEXT: popl %ebp
83 ; NOPOST-NEXT: retl
84 ;
85 ; POST-LABEL: PR35982_femms:
86 ; POST: # %bb.0:
87 ; POST-NEXT: pushl %ebp
88 ; POST-NEXT: movl %esp, %ebp
89 ; POST-NEXT: andl $-8, %esp
90 ; POST-NEXT: subl $16, %esp
91 ; POST-NEXT: movl 8(%ebp), %eax
92 ; POST-NEXT: movl 12(%ebp), %ecx
93 ; POST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
94 ; POST-NEXT: movl %eax, {{[0-9]+}}(%esp)
95 ; POST-NEXT: movq {{[0-9]+}}(%esp), %mm0
96 ; POST-NEXT: femms
97 ; POST-NEXT: movl %eax, (%esp)
98 ; POST-NEXT: fildl (%esp)
99 ; POST-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
100 ; POST-NEXT: movd %mm0, %ecx
101 ; POST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
102 ; POST-NEXT: fiaddl {{[0-9]+}}(%esp)
103 ; POST-NEXT: movl %ebp, %esp
104 ; POST-NEXT: popl %ebp
105 ; POST-NEXT: retl
41 ; CHECK-LABEL: PR35982_femms:
42 ; CHECK: # %bb.0:
43 ; CHECK-NEXT: pushl %ebp
44 ; CHECK-NEXT: movl %esp, %ebp
45 ; CHECK-NEXT: andl $-8, %esp
46 ; CHECK-NEXT: subl $16, %esp
47 ; CHECK-NEXT: movl 8(%ebp), %eax
48 ; CHECK-NEXT: movl 12(%ebp), %ecx
49 ; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
50 ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
51 ; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0
52 ; CHECK-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
53 ; CHECK-NEXT: movd %mm0, %ecx
54 ; CHECK-NEXT: femms
55 ; CHECK-NEXT: movl %eax, (%esp)
56 ; CHECK-NEXT: fildl (%esp)
57 ; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
58 ; CHECK-NEXT: fiaddl {{[0-9]+}}(%esp)
59 ; CHECK-NEXT: movl %ebp, %esp
60 ; CHECK-NEXT: popl %ebp
61 ; CHECK-NEXT: retl
10662 %2 = bitcast <1 x i64> %0 to <2 x i32>
10763 %3 = extractelement <2 x i32> %2, i32 0
10864 %4 = extractelement <1 x i64> %0, i32 0