llvm.org GIT mirror llvm / 4ac0136
Merging r200743: ------------------------------------------------------------------------ r200743 | michel.daenzer | 2014-02-03 23:12:38 -0800 (Mon, 03 Feb 2014) | 11 lines R600/SI: Fix fneg for 0.0 V_ADD_F32 with source modifier does not produce -0.0 for this. Just manipulate the sign bit directly instead. Also add a pattern for (fneg (fabs ...)). Fixes a bunch of bit encoding piglit tests with radeonsi. Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@204643 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
4 changed file(s) with 87 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
16671667 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
16681668 >;
16691669
1670 /********** ================================ **********/
1671 /********** Floating point absolute/negative **********/
1672 /********** ================================ **********/
1673
1674 // Manipulate the sign bit directly, as e.g. using the source negation modifier
1675 // in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0,
1676 // breaking the piglit *s-floatBitsToInt-neg* tests
1677
1678 // TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly
1679 // removing these patterns
1680
1681 def : Pat <
1682 (fneg (fabs f32:$src)),
1683 (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
1684 >;
1685
16701686 def : Pat <
16711687 (fabs f32:$src),
1672 (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
1673 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
1688 (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */
16741689 >;
16751690
16761691 def : Pat <
16771692 (fneg f32:$src),
1678 (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
1679 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
1693 (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */
16801694 >;
16811695
16821696 /********** ================== **********/
88 ; R600-CHECK-NOT: AND
99 ; R600-CHECK: |PV.{{[XYZW]}}|
1010 ; SI-CHECK-LABEL: @fabs_free
11 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
11 ; SI-CHECK: V_AND_B32
1212
1313 define void @fabs_free(float addrspace(1)* %out, i32 %in) {
1414 entry:
2222 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
2323 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
2424 ; SI-CHECK-LABEL: @fabs_v2
25 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
26 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
25 ; SI-CHECK: V_AND_B32
26 ; SI-CHECK: V_AND_B32
2727 define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
2828 entry:
2929 %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
3737 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
3838 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
3939 ; SI-CHECK-LABEL: @fabs_v4
40 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
41 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
42 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
43 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
40 ; SI-CHECK: V_AND_B32
41 ; SI-CHECK: V_AND_B32
42 ; SI-CHECK: V_AND_B32
43 ; SI-CHECK: V_AND_B32
4444 define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
4545 entry:
4646 %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
0 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
1 ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
2
3 ; DAGCombiner will transform:
4 ; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
5 ; unless isFabsFree returns true
6
7 ; R600-CHECK-LABEL: @fneg_fabs_free
8 ; R600-CHECK-NOT: AND
9 ; R600-CHECK: |PV.{{[XYZW]}}|
10 ; R600-CHECK: -PV
11 ; SI-CHECK-LABEL: @fneg_fabs_free
12 ; SI-CHECK: V_OR_B32
13
14 define void @fneg_fabs_free(float addrspace(1)* %out, i32 %in) {
15 entry:
16 %0 = bitcast i32 %in to float
17 %1 = call float @fabs(float %0)
18 %2 = fsub float -0.000000e+00, %1
19 store float %2, float addrspace(1)* %out
20 ret void
21 }
22
23 ; R600-CHECK-LABEL: @fneg_fabs_v2
24 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
25 ; R600-CHECK: -PV
26 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
27 ; R600-CHECK: -PV
28 ; SI-CHECK-LABEL: @fneg_fabs_v2
29 ; SI-CHECK: V_OR_B32
30 ; SI-CHECK: V_OR_B32
31 define void @fneg_fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
32 entry:
33 %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
34 %1 = fsub <2 x float> , %0
35 store <2 x float> %1, <2 x float> addrspace(1)* %out
36 ret void
37 }
38
39 ; SI-CHECK-LABEL: @fneg_fabs_v4
40 ; SI-CHECK: V_OR_B32
41 ; SI-CHECK: V_OR_B32
42 ; SI-CHECK: V_OR_B32
43 ; SI-CHECK: V_OR_B32
44 define void @fneg_fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
45 entry:
46 %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
47 %1 = fsub <4 x float> , %0
48 store <4 x float> %1, <4 x float> addrspace(1)* %out
49 ret void
50 }
51
52 declare float @fabs(float ) readnone
53 declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
54 declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
33 ; R600-CHECK-LABEL: @fneg
44 ; R600-CHECK: -PV
55 ; SI-CHECK-LABEL: @fneg
6 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
6 ; SI-CHECK: V_XOR_B32
77 define void @fneg(float addrspace(1)* %out, float %in) {
88 entry:
99 %0 = fsub float -0.000000e+00, %in
1515 ; R600-CHECK: -PV
1616 ; R600-CHECK: -PV
1717 ; SI-CHECK-LABEL: @fneg_v2
18 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
19 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
18 ; SI-CHECK: V_XOR_B32
19 ; SI-CHECK: V_XOR_B32
2020 define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
2121 entry:
2222 %0 = fsub <2 x float> , %in
3030 ; R600-CHECK: -PV
3131 ; R600-CHECK: -PV
3232 ; SI-CHECK-LABEL: @fneg_v4
33 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
34 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
35 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
36 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
33 ; SI-CHECK: V_XOR_B32
34 ; SI-CHECK: V_XOR_B32
35 ; SI-CHECK: V_XOR_B32
36 ; SI-CHECK: V_XOR_B32
3737 define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
3838 entry:
3939 %0 = fsub <4 x float> , %in