llvm.org GIT mirror llvm / 89d9794
Merging r182394: ------------------------------------------------------------------------ r182394 | jholewinski | 2013-05-21 09:51:30 -0700 (Tue, 21 May 2013) | 1 line [NVPTX] Add @llvm.nvvm.sqrt.f() intrinsic ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_33@182829 91177308-0d34-0410-b5e6-96231b3b80d8 Bill Wendling 7 years ago
6 changed file(s) with 33 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
404404 // Sqrt
405405 //
406406
407 def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">,
408 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
407409 def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
408410 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
409411 def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
4141 " IEEE Compliant F32 div.rnd if avaiable."),
4242 cl::init(2));
4343
44 static cl::opt
45 UsePrecSqrtF32("nvptx-prec-sqrtf32",
46 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
47 cl::init(true));
48
4449 /// createNVPTXISelDag - This pass converts a legalized DAG into a
4550 /// NVPTX-specific DAG, ready for instruction scheduling.
4651 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
7378
7479 // Decide how to translate f32 div
7580 do_DIVF32_PREC = UsePrecDivF32;
81 // Decide how to translate f32 sqrt
82 do_SQRTF32_PREC = UsePrecSqrtF32;
7683 // sm less than sm_20 does not support div.rnd. Use div.full.
7784 if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
7885 do_DIVF32_PREC = 1;
3939 // 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated;
4040 // Otherwise, use div.full
4141 int do_DIVF32_PREC;
42
43 // If true, generate sqrt.rn, else generate sqrt.approx. If FTZ
44 // is true, then generate the corresponding FTZ version.
45 bool do_SQRTF32_PREC;
4246
4347 // If true, add .ftz to f32 instructions.
4448 // This is only meaningful for sm_20 and later, as the default
7373
7474 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
7575 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
76
77 def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
78 def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
7679
7780 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
7881
510510 Float64Regs, int_nvvm_sqrt_rm_d>;
511511 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
512512 Float64Regs, int_nvvm_sqrt_rp_d>;
513
514 // nvvm_sqrt intrinsic
515 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
516 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
517 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
518 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
519 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
520 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
521 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
522 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
513523
514524 //
515525 // Rsqrt
1414 ret double %x
1515 }
1616
17 define float @test_nvvm_sqrt(float %a) {
18 %val = call float @llvm.nvvm.sqrt.f(float %a)
19 ret float %val
20 }
21
22
1723 declare float @llvm.fabs.f32(float)
1824 declare double @llvm.fabs.f64(double)
25 declare float @llvm.nvvm.sqrt.f(float)