llvm.org GIT mirror llvm / 73aa02e
[x86] set default reciprocal (division and square root) codegen to match GCC D8982 ( checked in at http://reviews.llvm.org/rL239001 ) added command-line options to allow reciprocal estimate instructions to be used in place of divisions and square roots. This patch changes the default settings for x86 targets to allow that recip codegen (except for scalar division because that breaks too much code) when using -ffast-math or its equivalent. This matches GCC behavior for this kind of codegen. Differential Revision: http://reviews.llvm.org/D10396 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240310 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 4 years ago
3 changed file(s) with 63 addition(s) and 60 deletion(s). Raw diff Collapse all Expand all
109109 if (Subtarget.isTargetWin64())
110110 this->Options.TrapUnreachable = true;
111111
112 // TODO: By default, all reciprocal estimate operations are off because
113 // that matches the behavior before TargetRecip was added (except for btver2
114 // which used subtarget features to enable this type of codegen).
115 // We should change this to match GCC behavior where everything but
116 // scalar division estimates are turned on by default with -ffast-math.
117 this->Options.Reciprocals.setDefaults("all", false, 1);
112 // By default (and when -ffast-math is on), enable estimate codegen for
113 // everything except scalar division. By default, use 1 refinement step for
114 // all operations. Defaults may be overridden by using command-line options.
115 // Scalar division estimates are disabled because they break too much
116 // real-world code. These defaults match GCC behavior.
117 this->Options.Reciprocals.setDefaults("sqrtf", true, 1);
118 this->Options.Reciprocals.setDefaults("divf", false, 1);
119 this->Options.Reciprocals.setDefaults("vec-sqrtf", true, 1);
120 this->Options.Reciprocals.setDefaults("vec-divf", true, 1);
118121
119122 initAsmInfo();
120123 }
None ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP
22 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE
33
1313 %div = fdiv fast float 1.0, %x
1414 ret float %div
1515
16 ; CHECK-LABEL: reciprocal_estimate:
17 ; CHECK: movss
18 ; CHECK-NEXT: divss
19 ; CHECK-NEXT: movaps
20 ; CHECK-NEXT: retq
16 ; NORECIP-LABEL: reciprocal_estimate:
17 ; NORECIP: movss
18 ; NORECIP-NEXT: divss
19 ; NORECIP-NEXT: movaps
20 ; NORECIP-NEXT: retq
2121
2222 ; RECIP-LABEL: reciprocal_estimate:
2323 ; RECIP: vrcpss
4444 %div = fdiv fast <4 x float> , %x
4545 ret <4 x float> %div
4646
47 ; CHECK-LABEL: reciprocal_estimate_v4f32:
48 ; CHECK: movaps
49 ; CHECK-NEXT: divps
50 ; CHECK-NEXT: movaps
51 ; CHECK-NEXT: retq
47 ; NORECIP-LABEL: reciprocal_estimate_v4f32:
48 ; NORECIP: movaps
49 ; NORECIP-NEXT: divps
50 ; NORECIP-NEXT: movaps
51 ; NORECIP-NEXT: retq
5252
5353 ; RECIP-LABEL: reciprocal_estimate_v4f32:
5454 ; RECIP: vrcpps
7575 %div = fdiv fast <8 x float> , %x
7676 ret <8 x float> %div
7777
78 ; CHECK-LABEL: reciprocal_estimate_v8f32:
79 ; CHECK: movaps
80 ; CHECK: movaps
81 ; CHECK-NEXT: divps
82 ; CHECK-NEXT: divps
83 ; CHECK-NEXT: movaps
84 ; CHECK-NEXT: movaps
85 ; CHECK-NEXT: retq
78 ; NORECIP-LABEL: reciprocal_estimate_v8f32:
79 ; NORECIP: movaps
80 ; NORECIP: movaps
81 ; NORECIP-NEXT: divps
82 ; NORECIP-NEXT: divps
83 ; NORECIP-NEXT: movaps
84 ; NORECIP-NEXT: movaps
85 ; NORECIP-NEXT: retq
8686
8787 ; RECIP-LABEL: reciprocal_estimate_v8f32:
8888 ; RECIP: vrcpps
None ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!sqrtf,!vec-sqrtf,!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE
22
33 declare double @__sqrt_finite(double) #0
99
1010
1111 define double @fd(double %d) #0 {
12 ; CHECK-LABEL: fd:
13 ; CHECK: # BB#0:
14 ; CHECK-NEXT: sqrtsd %xmm0, %xmm0
15 ; CHECK-NEXT: retq
12 ; NORECIP-LABEL: fd:
13 ; NORECIP: # BB#0:
14 ; NORECIP-NEXT: sqrtsd %xmm0, %xmm0
15 ; NORECIP-NEXT: retq
1616 ;
1717 ; ESTIMATE-LABEL: fd:
1818 ; ESTIMATE: # BB#0:
2424
2525
2626 define float @ff(float %f) #0 {
27 ; CHECK-LABEL: ff:
28 ; CHECK: # BB#0:
29 ; CHECK-NEXT: sqrtss %xmm0, %xmm0
30 ; CHECK-NEXT: retq
27 ; NORECIP-LABEL: ff:
28 ; NORECIP: # BB#0:
29 ; NORECIP-NEXT: sqrtss %xmm0, %xmm0
30 ; NORECIP-NEXT: retq
3131 ;
3232 ; ESTIMATE-LABEL: ff:
3333 ; ESTIMATE: # BB#0:
4848
4949
5050 define x86_fp80 @fld(x86_fp80 %ld) #0 {
51 ; CHECK-LABEL: fld:
52 ; CHECK: # BB#0:
53 ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
54 ; CHECK-NEXT: fsqrt
55 ; CHECK-NEXT: retq
51 ; NORECIP-LABEL: fld:
52 ; NORECIP: # BB#0:
53 ; NORECIP-NEXT: fldt {{[0-9]+}}(%rsp)
54 ; NORECIP-NEXT: fsqrt
55 ; NORECIP-NEXT: retq
5656 ;
5757 ; ESTIMATE-LABEL: fld:
5858 ; ESTIMATE: # BB#0:
6666
6767
6868 define float @reciprocal_square_root(float %x) #0 {
69 ; CHECK-LABEL: reciprocal_square_root:
70 ; CHECK: # BB#0:
71 ; CHECK-NEXT: sqrtss %xmm0, %xmm1
72 ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
73 ; CHECK-NEXT: divss %xmm1, %xmm0
74 ; CHECK-NEXT: retq
69 ; NORECIP-LABEL: reciprocal_square_root:
70 ; NORECIP: # BB#0:
71 ; NORECIP-NEXT: sqrtss %xmm0, %xmm1
72 ; NORECIP-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
73 ; NORECIP-NEXT: divss %xmm1, %xmm0
74 ; NORECIP-NEXT: retq
7575 ;
7676 ; ESTIMATE-LABEL: reciprocal_square_root:
7777 ; ESTIMATE: # BB#0:
8888 }
8989
9090 define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
91 ; CHECK-LABEL: reciprocal_square_root_v4f32:
92 ; CHECK: # BB#0:
93 ; CHECK-NEXT: sqrtps %xmm0, %xmm1
94 ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
95 ; CHECK-NEXT: divps %xmm1, %xmm0
96 ; CHECK-NEXT: retq
91 ; NORECIP-LABEL: reciprocal_square_root_v4f32:
92 ; NORECIP: # BB#0:
93 ; NORECIP-NEXT: sqrtps %xmm0, %xmm1
94 ; NORECIP-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
95 ; NORECIP-NEXT: divps %xmm1, %xmm0
96 ; NORECIP-NEXT: retq
9797 ;
9898 ; ESTIMATE-LABEL: reciprocal_square_root_v4f32:
9999 ; ESTIMATE: # BB#0:
110110 }
111111
112112 define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
113 ; CHECK-LABEL: reciprocal_square_root_v8f32:
114 ; CHECK: # BB#0:
115 ; CHECK-NEXT: sqrtps %xmm1, %xmm2
116 ; CHECK-NEXT: sqrtps %xmm0, %xmm3
117 ; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
118 ; CHECK-NEXT: movaps %xmm1, %xmm0
119 ; CHECK-NEXT: divps %xmm3, %xmm0
120 ; CHECK-NEXT: divps %xmm2, %xmm1
121 ; CHECK-NEXT: retq
113 ; NORECIP-LABEL: reciprocal_square_root_v8f32:
114 ; NORECIP: # BB#0:
115 ; NORECIP-NEXT: sqrtps %xmm1, %xmm2
116 ; NORECIP-NEXT: sqrtps %xmm0, %xmm3
117 ; NORECIP-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
118 ; NORECIP-NEXT: movaps %xmm1, %xmm0
119 ; NORECIP-NEXT: divps %xmm3, %xmm0
120 ; NORECIP-NEXT: divps %xmm2, %xmm1
121 ; NORECIP-NEXT: retq
122122 ;
123123 ; ESTIMATE-LABEL: reciprocal_square_root_v8f32:
124124 ; ESTIMATE: # BB#0: