llvm.org GIT mirror llvm / 7150fbf
AMDGPU: Remove legacy rsq.clamped intrinsic Mesa still has a use of llvm.AMDGPU.rsq.f64 remaining. Also fix mismatch with non-IEEE rsq selecting to IEEE rsq. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275617 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 4 years ago
12 changed file(s) with 76 addition(s) and 121 deletion(s). Raw diff Collapse all Expand all
5555 Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>,
5656 GCCBuiltin<"__builtin_r600_rat_store_typed">;
5757
58 def int_r600_rsq : Intrinsic<
59 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
60 >;
61
58 def int_r600_recipsqrt_ieee : Intrinsic<
59 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
60 >;
61
62 def int_r600_recipsqrt_clamped : Intrinsic<
63 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
64 >;
6265
6366 } // End TargetPrefix = "r600"
6467
2424 def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
2525 def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
2626
27 def int_AMDGPU_rsq_clamped : Intrinsic<
28 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
29 >;
30
3127 // Deprecated in favor of llvm.amdgcn.rsq
3228 def int_AMDGPU_rsq : Intrinsic<
3329 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
808808 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
809809 AMDGPU::T0_Z, VT);
810810
811 // FIXME: Should be renamed to r600 prefix
812 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
811 case Intrinsic::r600_recipsqrt_ieee:
812 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
813
814 case Intrinsic::r600_recipsqrt_clamped:
813815 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
814
815 case Intrinsic::r600_rsq:
816 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
817 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
818 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
819 }
816 }
817
820818 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
821819 break;
822820 }
11161116 }
11171117
11181118 class RECIPSQRT_IEEE_Common inst> : R600_1OP_Helper <
1119 inst, "RECIPSQRT_IEEE", AMDGPUrsq_legacy
1120 > {
1119 inst, "RECIPSQRT_IEEE", AMDGPUrsq> {
11211120 let Itinerary = TransALU;
11221121 }
11231122
16811681
16821682 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
16831683 }
1684 case Intrinsic::amdgcn_rsq_clamp:
1685 case AMDGPUIntrinsic::AMDGPU_rsq_clamped: { // Legacy name
1684 case Intrinsic::amdgcn_rsq_clamp: {
16861685 if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
16871686 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
16881687
9898 %tmp88 = insertelement <4 x float> %tmp87, float %tmp32, i32 2
9999 %tmp89 = insertelement <4 x float> %tmp88, float 0.000000e+00, i32 3
100100 %tmp90 = call float @llvm.r600.dot4(<4 x float> %tmp85, <4 x float> %tmp89)
101 %tmp91 = call float @llvm.AMDGPU.rsq.clamped.f32(float %tmp90)
101 %tmp91 = call float @llvm.r600.recipsqrt.clamped.f32(float %tmp90)
102102 %tmp92 = fmul float %tmp30, %tmp91
103103 %tmp93 = fmul float %tmp31, %tmp91
104104 %tmp94 = fmul float %tmp32, %tmp91
197197 %tmp181 = fadd float %tmp180, %tmp28
198198 %tmp182 = fdiv float 1.000000e+00, %tmp33
199199 %tmp183 = fmul float %tmp32, %tmp182
200 %tmp184 = call float @fabs(float %tmp183)
200 %tmp184 = call float @llvm.fabs.f32(float %tmp183)
201201 %tmp185 = fmul float %tmp176, 0x3FD99999A0000000
202202 %tmp186 = fadd float %tmp185, 0x3FAEB851E0000000
203203 %tmp187 = fmul float %tmp177, 0x3FE3333340000000
349349 %tmp329 = insertelement <4 x float> %tmp328, float %tmp322, i32 2
350350 %tmp330 = insertelement <4 x float> %tmp329, float 0.000000e+00, i32 3
351351 %tmp331 = call float @llvm.r600.dot4(<4 x float> %tmp326, <4 x float> %tmp330)
352 %tmp332 = call float @llvm.AMDGPU.rsq.clamped.f32(float %tmp331)
352 %tmp332 = call float @llvm.r600.recipsqrt.clamped.f32(float %tmp331)
353353 %tmp333 = fmul float %tmp318, %tmp332
354354 %tmp334 = fmul float %tmp320, %tmp332
355355 %tmp335 = fmul float %tmp322, %tmp332
382382 %tmp362 = insertelement <4 x float> %tmp361, float %tmp45, i32 2
383383 %tmp363 = insertelement <4 x float> %tmp362, float 0.000000e+00, i32 3
384384 %tmp364 = call float @llvm.r600.dot4(<4 x float> %tmp359, <4 x float> %tmp363)
385 %tmp365 = call float @llvm.AMDGPU.rsq.clamped.f32(float %tmp364)
385 %tmp365 = call float @llvm.r600.recipsqrt.clamped.f32(float %tmp364)
386386 %tmp366 = fmul float %tmp45, %tmp365
387 %tmp367 = call float @fabs(float %tmp366)
387 %tmp367 = call float @llvm.fabs.f32(float %tmp366)
388388 %tmp368 = fmul float %tmp178, 0x3FECCCCCC0000000
389389 %tmp369 = fadd float %tmp368, %tmp367
390390 %tmp370 = fadd float %tmp369, 0xBFEFAE1480000000
408408 %tmp388 = insertelement <4 x float> %tmp387, float %tmp45, i32 2
409409 %tmp389 = insertelement <4 x float> %tmp388, float 0.000000e+00, i32 3
410410 %tmp390 = call float @llvm.r600.dot4(<4 x float> %tmp385, <4 x float> %tmp389)
411 %tmp391 = call float @llvm.AMDGPU.rsq.clamped.f32(float %tmp390)
411 %tmp391 = call float @llvm.r600.recipsqrt.clamped.f32(float %tmp390)
412412 %tmp392 = fmul float %tmp45, %tmp391
413 %tmp393 = call float @fabs(float %tmp392)
413 %tmp393 = call float @llvm.fabs.f32(float %tmp392)
414414 %tmp394 = fmul float %tmp178, 0x3FF51EB860000000
415415 %tmp395 = fadd float %tmp394, %tmp393
416416 %tmp396 = fadd float %tmp395, 0xBFEFAE1480000000
11491149 %tmp875 = insertelement <4 x float> %tmp874, float %tmp45, i32 2
11501150 %tmp876 = insertelement <4 x float> %tmp875, float 0.000000e+00, i32 3
11511151 %tmp877 = call float @llvm.r600.dot4(<4 x float> %tmp872, <4 x float> %tmp876)
1152 %tmp878 = call float @llvm.AMDGPU.rsq.clamped.f32(float %tmp877)
1152 %tmp878 = call float @llvm.r600.recipsqrt.clamped.f32(float %tmp877)
11531153 %tmp879 = fmul float %tmp45, %tmp878
1154 %tmp880 = call float @fabs(float %tmp879)
1154 %tmp880 = call float @llvm.fabs.f32(float %tmp879)
11551155 %tmp881 = fmul float %tmp178, 0x3FECCCCCC0000000
11561156 %tmp882 = fadd float %tmp881, %tmp880
11571157 %tmp883 = fadd float %tmp882, 0xBFEFAE1480000000
12911291 declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #0
12921292
12931293 ; Function Attrs: nounwind readnone
1294 declare float @llvm.AMDGPU.rsq.clamped.f32(float) #0
1294 declare float @llvm.r600.recipsqrt.clamped.f32(float) #0
12951295
12961296 ; Function Attrs: nounwind readonly
1297 declare float @fabs(float) #1
1297 declare float @llvm.fabs.f32(float) #1
12981298
12991299 ; Function Attrs: nounwind readnone
13001300 declare float @llvm.exp2.f32(float) #0
+0
-21
test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll less more
None ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
2
3 declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
4
5 ; FUNC-LABEL: {{^}}rsq_clamped_f64:
6 ; SI: v_rsq_clamp_f64_e32
7
8 ; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}]
9 ; TODO: this constant should be folded:
10 ; VI-DAG: s_mov_b32 s[[LOW1:[0-9+]]], -1
11 ; VI-DAG: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
12 ; VI-DAG: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
13 ; VI-DAG: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
14 ; VI-DAG: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW1]]:[[HIGH2]]]
15
16 define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
17 %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
18 store double %rsq_clamped, double addrspace(1)* %out, align 8
19 ret void
20 }
+0
-25
test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll less more
None ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3
4 ; FIXME: Uses of this should be moved to llvm.amdgcn.rsq.clamped, and
5 ; an r600 variant added.
6
7 declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
8
9 ; FUNC-LABEL: {{^}}rsq_clamped_f32:
10 ; SI: v_rsq_clamp_f32_e32
11
12 ; VI-DAG: v_rsq_f32_e32 [[RSQ:v[0-9]+]], {{s[0-9]+}}
13 ; VI-DAG: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
14 ; TODO: this constant should be folded:
15 ; VI-DAG: v_mov_b32_e32 [[MINFLT:v[0-9]+]], 0xff7fffff
16 ; VI: v_max_f32_e32 {{v[0-9]+}}, [[MIN]], [[MINFLT]]
17
18 ; EG: RECIPSQRT_CLAMPED
19
20 define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
21 %rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone
22 store float %rsq_clamped, float addrspace(1)* %out, align 4
23 ret void
24 }
+0
-33
test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll less more
None ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3
4 declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone
5
6 ; FUNC-LABEL: {{^}}rsq_f32:
7 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
8 ; EG: RECIPSQRT_IEEE
9 define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind {
10 %rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone
11 store float %rsq, float addrspace(1)* %out, align 4
12 ret void
13 }
14
15 ; TODO: Really these should be constant folded
16 ; FUNC-LABEL: {{^}}rsq_f32_constant_4.0
17 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0
18 ; EG: RECIPSQRT_IEEE
19 define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind {
20 %rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone
21 store float %rsq, float addrspace(1)* %out, align 4
22 ret void
23 }
24
25 ; FUNC-LABEL: {{^}}rsq_f32_constant_100.0
26 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000
27 ; EG: RECIPSQRT_IEEE
28 define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind {
29 %rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone
30 store float %rsq, float addrspace(1)* %out, align 4
31 ret void
32 }
0 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
1
2 declare float @llvm.r600.recipsqrt.clamped.f32(float) nounwind readnone
3
4 ; EG-LABEL: {{^}}rsq_clamped_f32:
5 ; EG: RECIPSQRT_CLAMPED
6 define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
7 %rsq_clamped = call float @llvm.r600.recipsqrt.clamped.f32(float %src)
8 store float %rsq_clamped, float addrspace(1)* %out, align 4
9 ret void
10 }
0 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
1
2 declare float @llvm.r600.recipsqrt.ieee.f32(float) nounwind readnone
3
4 ; EG-LABEL: {{^}}recipsqrt.ieee_f32:
5 ; EG: RECIPSQRT_IEEE
6 define void @recipsqrt.ieee_f32(float addrspace(1)* %out, float %src) nounwind {
7 %recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float %src) nounwind readnone
8 store float %recipsqrt.ieee, float addrspace(1)* %out, align 4
9 ret void
10 }
11
12 ; TODO: Really these should be constant folded
13 ; EG-LABEL: {{^}}recipsqrt.ieee_f32_constant_4.0
14 ; EG: RECIPSQRT_IEEE
15 define void @recipsqrt.ieee_f32_constant_4.0(float addrspace(1)* %out) nounwind {
16 %recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float 4.0) nounwind readnone
17 store float %recipsqrt.ieee, float addrspace(1)* %out, align 4
18 ret void
19 }
20
21 ; EG-LABEL: {{^}}recipsqrt.ieee_f32_constant_100.0
22 ; EG: RECIPSQRT_IEEE
23 define void @recipsqrt.ieee_f32_constant_100.0(float addrspace(1)* %out) nounwind {
24 %recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float 100.0) nounwind readnone
25 store float %recipsqrt.ieee, float addrspace(1)* %out, align 4
26 ret void
27 }
101101 %94 = insertelement <4 x float> %93, float %6, i32 2
102102 %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
103103 %96 = call float @llvm.r600.dot4(<4 x float> %91, <4 x float> %95)
104 %97 = call float @fabs(float %96)
105 %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97)
104 %97 = call float @llvm.fabs.f32(float %96)
105 %98 = call float @llvm.r600.recipsqrt.clamped.f32(float %97)
106106 %99 = fmul float %4, %98
107107 %100 = fmul float %5, %98
108108 %101 = fmul float %6, %98
221221 declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
222222
223223 ; Function Attrs: readonly
224 declare float @fabs(float) #2
224 declare float @llvm.fabs.f32(float) #1
225225
226226 ; Function Attrs: readnone
227 declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
227 declare float @llvm.r600.recipsqrt.clamped.f32(float) #1
228228
229229 ; Function Attrs: readnone
230230 declare float @llvm.AMDGPU.clamp.f32(float, float, float) #1
231231
232232 ; Function Attrs: nounwind readonly
233 declare float @llvm.pow.f32(float, float) #3
234
235 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
236
237 attributes #1 = { readnone }
238 attributes #2 = { readonly }
239 attributes #3 = { nounwind readonly }
233 declare float @llvm.pow.f32(float, float) #2
234
235 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) #3
236
237 attributes #1 = { nounwind readnone }
238 attributes #2 = { nounwind readonly }
239 attributes #3 = { nounwind }