llvm.org GIT mirror llvm / 5ee5a7c
[NVPTX] Lower loads from global constants using ld.global.nc (aka LDG). Summary: After D43914, loads from global variables in addrspace(1) happen with ld.global. But since they're constants, even better would be to use ld.global.nc, aka ldg. Reviewers: tra Subscribers: jholewinski, sanjoy, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D43915 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326390 91177308-0d34-0410-b5e6-96231b3b80d8 Justin Lebar 2 years ago
2 changed file(s) with 49 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
986986 // We have two ways of identifying invariant loads: Loads may be explicitly
987987 // marked as invariant, or we may infer them to be invariant.
988988 //
989 // We currently infer invariance only for kernel function pointer params that
990 // are noalias (i.e. __restrict) and never written to.
989 // We currently infer invariance for loads from
990 // - constant global variables, and
991 // - kernel function pointer params that are noalias (i.e. __restrict) and
992 // never written to.
991993 //
992994 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
993995 // not during the SelectionDAG phase).
10011003 if (N->isInvariant())
10021004 return true;
10031005
1004 // Load wasn't explicitly invariant. Attempt to infer invariance.
1005 if (!isKernelFunction(F->getFunction()))
1006 return false;
1007
1008 // We use GetUnderlyingObjects() here instead of
1009 // GetUnderlyingObject() mainly because the former looks through phi
1010 // nodes while the latter does not. We need to look through phi
1011 // nodes to handle pointer induction variables.
1006 bool IsKernelFn = isKernelFunction(F->getFunction());
1007
1008 // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
1009 // because the former looks through phi nodes while the latter does not. We
1010 // need to look through phi nodes to handle pointer induction variables.
10121011 SmallVector Objs;
10131012 GetUnderlyingObjects(const_cast(N->getMemOperand()->getValue()),
10141013 Objs, F->getDataLayout());
1015 for (Value *Obj : Objs) {
1016 auto *A = dyn_cast(Obj);
1017 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
1018 }
1019
1020 return true;
1014
1015 return all_of(Objs, [&](Value *V) {
1016 if (auto *A = dyn_cast(V))
1017 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
1018 if (auto *GV = dyn_cast(V))
1019 return GV->isConstant();
1020 return false;
1021 });
10211022 }
10221023
10231024 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
16311632 switch (N->getOpcode()) {
16321633 default:
16331634 return false;
1635 case ISD::LOAD:
16341636 case ISD::INTRINSIC_W_CHAIN:
16351637 if (IsLDG)
16361638 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
16531655 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
16541656 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
16551657 break;
1658 case NVPTXISD::LoadV2:
16561659 case NVPTXISD::LDGV2:
16571660 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
16581661 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
16751678 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
16761679 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
16771680 break;
1681 case NVPTXISD::LoadV4:
16781682 case NVPTXISD::LDGV4:
16791683 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
16801684 NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
0 ; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
1
2 ; Check load from constant global variables. These loads should be
3 ; ld.global.nc (aka ldg).
4
5 @gv_float = external constant float
6 @gv_float2 = external constant <2 x float>
7 @gv_float4 = external constant <4 x float>
8
9 ; CHECK-LABEL: test_gv_float()
10 define float @test_gv_float() {
11 ; CHECK: ld.global.nc.f32
12 %v = load float, float* @gv_float
13 ret float %v
14 }
15
16 ; CHECK-LABEL: test_gv_float2()
17 define <2 x float> @test_gv_float2() {
18 ; CHECK: ld.global.nc.v2.f32
19 %v = load <2 x float>, <2 x float>* @gv_float2
20 ret <2 x float> %v
21 }
22
23 ; CHECK-LABEL: test_gv_float4()
24 define <4 x float> @test_gv_float4() {
25 ; CHECK: ld.global.nc.v4.f32
26 %v = load <4 x float>, <4 x float>* @gv_float4
27 ret <4 x float> %v
28 }