llvm.org GIT mirror llvm / dc0ca89
use update_llc_test_checks.py to tighten checking test features, not CPUs remove unnecessary cruft git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234622 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 4 years ago
1 changed file(s) with 105 addition(s) and 96 deletion(s). Raw diff Collapse all Expand all
None ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
0 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-sqrt-est | FileCheck %s --check-prefix=ESTIMATE
22
3 ; generated using "clang -S -O2 -ffast-math -emit-llvm sqrt.c" from
4 ; #include
5 ;
6 ; double fd(double d){
7 ; return sqrt(d);
8 ; }
9 ;
10 ; float ff(float f){
11 ; return sqrtf(f);
12 ; }
13 ;
14 ; long double fld(long double ld){
15 ; return sqrtl(ld);
16 ; }
3 declare double @__sqrt_finite(double) #0
4 declare float @__sqrtf_finite(float) #0
5 declare x86_fp80 @__sqrtl_finite(x86_fp80) #0
6 declare float @llvm.sqrt.f32(float) #0
7 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
8 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
9
10
11 define double @fd(double %d) #0 {
12 ; CHECK-LABEL: fd:
13 ; CHECK: # BB#0:
14 ; CHECK-NEXT: sqrtsd %xmm0, %xmm0
15 ; CHECK-NEXT: retq
1716 ;
18 ; Tests conversion of sqrt function calls into sqrt instructions when
19 ; -ffast-math is in effect.
20
21 ; ModuleID = 'sqrt.c'
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
23 target triple = "x86_64-unknown-linux-gnu"
24
25 ; Function Attrs: nounwind readnone uwtable
26 define double @fd(double %d) #0 {
27 entry:
28 ; CHECK: sqrtsd
29 %call = tail call double @__sqrt_finite(double %d) #2
17 ; ESTIMATE-LABEL: fd:
18 ; ESTIMATE: # BB#0:
19 ; ESTIMATE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
20 ; ESTIMATE-NEXT: retq
21 %call = tail call double @__sqrt_finite(double %d) #1
3022 ret double %call
3123 }
3224
33 ; Function Attrs: nounwind readnone
34 declare double @__sqrt_finite(double) #1
3525
36 ; Function Attrs: nounwind readnone uwtable
3726 define float @ff(float %f) #0 {
38 entry:
39 ; CHECK: sqrtss
40 %call = tail call float @__sqrtf_finite(float %f) #2
27 ; CHECK-LABEL: ff:
28 ; CHECK: # BB#0:
29 ; CHECK-NEXT: sqrtss %xmm0, %xmm0
30 ; CHECK-NEXT: retq
31 ;
32 ; ESTIMATE-LABEL: ff:
33 ; ESTIMATE: # BB#0:
34 ; ESTIMATE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
35 ; ESTIMATE-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm2
36 ; ESTIMATE-NEXT: vmulss %xmm1, %xmm1, %xmm1
37 ; ESTIMATE-NEXT: vmulss %xmm0, %xmm1, %xmm1
38 ; ESTIMATE-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
39 ; ESTIMATE-NEXT: vmulss %xmm2, %xmm1, %xmm1
40 ; ESTIMATE-NEXT: vmulss %xmm1, %xmm0, %xmm1
41 ; ESTIMATE-NEXT: vxorps %xmm2, %xmm2, %xmm2
42 ; ESTIMATE-NEXT: vcmpeqss %xmm2, %xmm0, %xmm0
43 ; ESTIMATE-NEXT: vandnps %xmm1, %xmm0, %xmm0
44 ; ESTIMATE-NEXT: retq
45 %call = tail call float @__sqrtf_finite(float %f) #1
4146 ret float %call
4247 }
4348
44 ; Function Attrs: nounwind readnone
45 declare float @__sqrtf_finite(float) #1
4649
47 ; Function Attrs: nounwind readnone uwtable
4850 define x86_fp80 @fld(x86_fp80 %ld) #0 {
49 entry:
50 ; CHECK: fsqrt
51 %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
51 ; CHECK-LABEL: fld:
52 ; CHECK: # BB#0:
53 ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
54 ; CHECK-NEXT: fsqrt
55 ; CHECK-NEXT: retq
56 ;
57 ; ESTIMATE-LABEL: fld:
58 ; ESTIMATE: # BB#0:
59 ; ESTIMATE-NEXT: fldt {{[0-9]+}}(%rsp)
60 ; ESTIMATE-NEXT: fsqrt
61 ; ESTIMATE-NEXT: retq
62 %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #1
5263 ret x86_fp80 %call
5364 }
5465
55 declare x86_fp80 @__sqrtl_finite(x86_fp80) #1
5666
57 declare float @llvm.sqrt.f32(float) #1
58 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1
59 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #1
60
61 ; If the target's sqrtss and divss instructions are substantially
62 ; slower than rsqrtss with a Newton-Raphson refinement, we should
63 ; generate the estimate sequence.
6467
6568 define float @reciprocal_square_root(float %x) #0 {
69 ; CHECK-LABEL: reciprocal_square_root:
70 ; CHECK: # BB#0:
71 ; CHECK-NEXT: sqrtss %xmm0, %xmm1
72 ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
73 ; CHECK-NEXT: divss %xmm1, %xmm0
74 ; CHECK-NEXT: retq
75 ;
76 ; ESTIMATE-LABEL: reciprocal_square_root:
77 ; ESTIMATE: # BB#0:
78 ; ESTIMATE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
79 ; ESTIMATE-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm2
80 ; ESTIMATE-NEXT: vmulss %xmm1, %xmm1, %xmm1
81 ; ESTIMATE-NEXT: vmulss %xmm0, %xmm1, %xmm0
82 ; ESTIMATE-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
83 ; ESTIMATE-NEXT: vmulss %xmm2, %xmm0, %xmm0
84 ; ESTIMATE-NEXT: retq
6685 %sqrt = tail call float @llvm.sqrt.f32(float %x)
6786 %div = fdiv fast float 1.0, %sqrt
6887 ret float %div
69
70 ; CHECK-LABEL: reciprocal_square_root:
71 ; CHECK: sqrtss
72 ; CHECK-NEXT: movss
73 ; CHECK-NEXT: divss
74 ; CHECK-NEXT: retq
75 ; BTVER2-LABEL: reciprocal_square_root:
76 ; BTVER2: vrsqrtss
77 ; BTVER2-NEXT: vmulss
78 ; BTVER2-NEXT: vmulss
79 ; BTVER2-NEXT: vmulss
80 ; BTVER2-NEXT: vaddss
81 ; BTVER2-NEXT: vmulss
82 ; BTVER2-NEXT: retq
8388 }
8489
8590 define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
91 ; CHECK-LABEL: reciprocal_square_root_v4f32:
92 ; CHECK: # BB#0:
93 ; CHECK-NEXT: sqrtps %xmm0, %xmm1
94 ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
95 ; CHECK-NEXT: divps %xmm1, %xmm0
96 ; CHECK-NEXT: retq
97 ;
98 ; ESTIMATE-LABEL: reciprocal_square_root_v4f32:
99 ; ESTIMATE: # BB#0:
100 ; ESTIMATE-NEXT: vrsqrtps %xmm0, %xmm1
101 ; ESTIMATE-NEXT: vmulps %xmm1, %xmm1, %xmm2
102 ; ESTIMATE-NEXT: vmulps %xmm0, %xmm2, %xmm0
103 ; ESTIMATE-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
104 ; ESTIMATE-NEXT: vmulps {{.*}}(%rip), %xmm1, %xmm1
105 ; ESTIMATE-NEXT: vmulps %xmm1, %xmm0, %xmm0
106 ; ESTIMATE-NEXT: retq
86107 %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
87108 %div = fdiv fast <4 x float> , %sqrt
88109 ret <4 x float> %div
89
90 ; CHECK-LABEL: reciprocal_square_root_v4f32:
91 ; CHECK: sqrtps
92 ; CHECK-NEXT: movaps
93 ; CHECK-NEXT: divps
94 ; CHECK-NEXT: retq
95 ; BTVER2-LABEL: reciprocal_square_root_v4f32:
96 ; BTVER2: vrsqrtps
97 ; BTVER2-NEXT: vmulps
98 ; BTVER2-NEXT: vmulps
99 ; BTVER2-NEXT: vmulps
100 ; BTVER2-NEXT: vaddps
101 ; BTVER2-NEXT: vmulps
102 ; BTVER2-NEXT: retq
103110 }
104111
105112 define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
113 ; CHECK-LABEL: reciprocal_square_root_v8f32:
114 ; CHECK: # BB#0:
115 ; CHECK-NEXT: sqrtps %xmm1, %xmm2
116 ; CHECK-NEXT: sqrtps %xmm0, %xmm3
117 ; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
118 ; CHECK-NEXT: movaps %xmm1, %xmm0
119 ; CHECK-NEXT: divps %xmm3, %xmm0
120 ; CHECK-NEXT: divps %xmm2, %xmm1
121 ; CHECK-NEXT: retq
122 ;
123 ; ESTIMATE-LABEL: reciprocal_square_root_v8f32:
124 ; ESTIMATE: # BB#0:
125 ; ESTIMATE-NEXT: vrsqrtps %ymm0, %ymm1
126 ; ESTIMATE-NEXT: vmulps %ymm1, %ymm1, %ymm2
127 ; ESTIMATE-NEXT: vmulps %ymm0, %ymm2, %ymm0
128 ; ESTIMATE-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0
129 ; ESTIMATE-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
130 ; ESTIMATE-NEXT: vmulps %ymm1, %ymm0, %ymm0
131 ; ESTIMATE-NEXT: retq
106132 %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
107133 %div = fdiv fast <8 x float> , %sqrt
108134 ret <8 x float> %div
109
110 ; CHECK-LABEL: reciprocal_square_root_v8f32:
111 ; CHECK: sqrtps
112 ; CHECK-NEXT: sqrtps
113 ; CHECK-NEXT: movaps
114 ; CHECK-NEXT: movaps
115 ; CHECK-NEXT: divps
116 ; CHECK-NEXT: divps
117 ; CHECK-NEXT: retq
118 ; BTVER2-LABEL: reciprocal_square_root_v8f32:
119 ; BTVER2: vrsqrtps
120 ; BTVER2-NEXT: vmulps
121 ; BTVER2-NEXT: vmulps
122 ; BTVER2-NEXT: vmulps
123 ; BTVER2-NEXT: vaddps
124 ; BTVER2-NEXT: vmulps
125 ; BTVER2-NEXT: retq
126135 }
127136
128137
129 attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
130 attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
131 attributes #2 = { nounwind readnone }
138 attributes #0 = { "unsafe-fp-math"="true" }
139 attributes #1 = { nounwind readnone }
140