llvm.org GIT mirror llvm / 99f07c2
Revert "[DAGCombiner] try repeated fdiv divisor transform before building estimate" This reverts commit fb9a5307a94e6f1f850e4d89f79103b123f16279 (rL359398) because it can cause an infinite loop due to opposing combines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359695 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 1 year, 4 months ago
2 changed file(s) with 42 addition(s) and 30 deletion(s). Raw diff Collapse all Expand all
1199211992 if (SDValue NewSel = foldBinOpIntoSelect(N))
1199311993 return NewSel;
1199411994
11995 if (SDValue V = combineRepeatedFPDivisors(N))
11996 return V;
11997
1199811995 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
1199911996 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
1200011997 if (N1CFP) {
1208312080 Flags);
1208412081 }
1208512082 }
12083
12084 if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
12085 return CombineRepeatedDivisors;
1208612086
1208712087 return SDValue();
1208812088 }
5050 define <4 x float> @splat_fdiv_v4f32(<4 x float> %x, float %y) {
5151 ; SSE-LABEL: splat_fdiv_v4f32:
5252 ; SSE: # %bb.0:
53 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
54 ; SSE-NEXT: divss %xmm1, %xmm2
55 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,0,0]
56 ; SSE-NEXT: mulps %xmm2, %xmm0
53 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
54 ; SSE-NEXT: rcpps %xmm1, %xmm2
55 ; SSE-NEXT: mulps %xmm2, %xmm1
56 ; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
57 ; SSE-NEXT: subps %xmm1, %xmm3
58 ; SSE-NEXT: mulps %xmm2, %xmm3
59 ; SSE-NEXT: addps %xmm2, %xmm3
60 ; SSE-NEXT: mulps %xmm3, %xmm0
5761 ; SSE-NEXT: retq
5862 ;
5963 ; AVX-LABEL: splat_fdiv_v4f32:
6064 ; AVX: # %bb.0:
61 ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
62 ; AVX-NEXT: vdivss %xmm1, %xmm2, %xmm1
6365 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
66 ; AVX-NEXT: vrcpps %xmm1, %xmm2
67 ; AVX-NEXT: vmulps %xmm2, %xmm1, %xmm1
68 ; AVX-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
69 ; AVX-NEXT: vsubps %xmm1, %xmm3, %xmm1
70 ; AVX-NEXT: vmulps %xmm1, %xmm2, %xmm1
71 ; AVX-NEXT: vaddps %xmm1, %xmm2, %xmm1
6472 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
6573 ; AVX-NEXT: retq
6674 %vy = insertelement <4 x float> undef, float %y, i32 0
8189 ;
8290 ; AVX-LABEL: splat_fdiv_v8f32:
8391 ; AVX: # %bb.0:
84 ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
85 ; AVX-NEXT: vdivss %xmm1, %xmm2, %xmm1
8692 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
8793 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
94 ; AVX-NEXT: vrcpps %ymm1, %ymm2
95 ; AVX-NEXT: vmulps %ymm2, %ymm1, %ymm1
96 ; AVX-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
97 ; AVX-NEXT: vsubps %ymm1, %ymm3, %ymm1
98 ; AVX-NEXT: vmulps %ymm1, %ymm2, %ymm1
99 ; AVX-NEXT: vaddps %ymm1, %ymm2, %ymm1
88100 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
89101 ; AVX-NEXT: retq
90102 %vy = insertelement <8 x float> undef, float %y, i32 0
96108 define <4 x float> @splat_fdiv_v4f32_estimate(<4 x float> %x, float %y) #0 {
97109 ; SSE-LABEL: splat_fdiv_v4f32_estimate:
98110 ; SSE: # %bb.0:
99 ; SSE-NEXT: rcpss %xmm1, %xmm2
100 ; SSE-NEXT: mulss %xmm2, %xmm1
101 ; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
102 ; SSE-NEXT: subss %xmm1, %xmm3
103 ; SSE-NEXT: mulss %xmm2, %xmm3
104 ; SSE-NEXT: addss %xmm2, %xmm3
105 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0,0,0]
111 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
112 ; SSE-NEXT: rcpps %xmm1, %xmm2
113 ; SSE-NEXT: mulps %xmm2, %xmm1
114 ; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
115 ; SSE-NEXT: subps %xmm1, %xmm3
116 ; SSE-NEXT: mulps %xmm2, %xmm3
117 ; SSE-NEXT: addps %xmm2, %xmm3
106118 ; SSE-NEXT: mulps %xmm3, %xmm0
107119 ; SSE-NEXT: retq
108120 ;
109121 ; AVX-LABEL: splat_fdiv_v4f32_estimate:
110122 ; AVX: # %bb.0:
111 ; AVX-NEXT: vrcpss %xmm1, %xmm1, %xmm2
112 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
113 ; AVX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
114 ; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm1
115 ; AVX-NEXT: vmulss %xmm1, %xmm2, %xmm1
116 ; AVX-NEXT: vaddss %xmm1, %xmm2, %xmm1
117123 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
124 ; AVX-NEXT: vrcpps %xmm1, %xmm2
125 ; AVX-NEXT: vmulps %xmm2, %xmm1, %xmm1
126 ; AVX-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
127 ; AVX-NEXT: vsubps %xmm1, %xmm3, %xmm1
128 ; AVX-NEXT: vmulps %xmm1, %xmm2, %xmm1
129 ; AVX-NEXT: vaddps %xmm1, %xmm2, %xmm1
118130 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
119131 ; AVX-NEXT: retq
120132 %vy = insertelement <4 x float> undef, float %y, i32 0
139151 ;
140152 ; AVX-LABEL: splat_fdiv_v8f32_estimate:
141153 ; AVX: # %bb.0:
142 ; AVX-NEXT: vrcpss %xmm1, %xmm1, %xmm2
143 ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
144 ; AVX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
145 ; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm1
146 ; AVX-NEXT: vmulss %xmm1, %xmm2, %xmm1
147 ; AVX-NEXT: vaddss %xmm1, %xmm2, %xmm1
148154 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
149155 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
156 ; AVX-NEXT: vrcpps %ymm1, %ymm2
157 ; AVX-NEXT: vmulps %ymm2, %ymm1, %ymm1
158 ; AVX-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
159 ; AVX-NEXT: vsubps %ymm1, %ymm3, %ymm1
160 ; AVX-NEXT: vmulps %ymm1, %ymm2, %ymm1
161 ; AVX-NEXT: vaddps %ymm1, %ymm2, %ymm1
150162 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
151163 ; AVX-NEXT: retq
152164 %vy = insertelement <8 x float> undef, float %y, i32 0