llvm.org GIT mirror llvm / 1d2683f
[NVPTX] Use addrspacecast instead of target-specific intrinsics in NVPTXGenericToNVVM. Summary: NVPTXGenericToNVVM was using target-specific intrinsics to do address space casts. Using the addrspacecast instruction is (a lot) simpler. But it also has the advantage of being understandable to other passes. In particular, InferAddrSpaces is able to understand these address space casts and remove them in most cases. Reviewers: tra Subscribers: jholewinski, sanjoy, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D43914 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326389 91177308-0d34-0410-b5e6-96231b3b80d8 Justin Lebar 2 years ago
4 changed file(s) with 19 addition(s) and 65 deletion(s). Raw diff Collapse all Expand all
4444 void getAnalysisUsage(AnalysisUsage &AU) const override {}
4545
4646 private:
47 Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
48 IRBuilder<> &Builder);
4947 Value *remapConstant(Module *M, Function *F, Constant *C,
5048 IRBuilder<> &Builder);
5149 Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F,
155153 return true;
156154 }
157155
158 Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
159 GlobalVariable *GV,
160 IRBuilder<> &Builder) {
161 PointerType *GVType = GV->getType();
162 Value *CVTA = nullptr;
163
164 // See if the address space conversion requires the operand to be bitcast
165 // to i8 addrspace(n)* first.
166 EVT ExtendedGVType = EVT::getEVT(GV->getValueType(), true);
167 if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) {
168 // A bitcast to i8 addrspace(n)* on the operand is needed.
169 LLVMContext &Context = M->getContext();
170 unsigned int AddrSpace = GVType->getAddressSpace();
171 Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace);
172 CVTA = Builder.CreateBitCast(GV, DestTy, "cvta");
173 // Insert the address space conversion.
174 Type *ResultType =
175 PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC);
176 Function *CVTAFunction = Intrinsic::getDeclaration(
177 M, Intrinsic::nvvm_ptr_global_to_gen, {ResultType, DestTy});
178 CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta");
179 // Another bitcast from i8 * to * is
180 // required.
181 DestTy =
182 PointerType::get(GV->getValueType(), llvm::ADDRESS_SPACE_GENERIC);
183 CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta");
184 } else {
185 // A simple CVTA is enough.
186 SmallVector ParamTypes;
187 ParamTypes.push_back(PointerType::get(GV->getValueType(),
188 llvm::ADDRESS_SPACE_GENERIC));
189 ParamTypes.push_back(GVType);
190 Function *CVTAFunction = Intrinsic::getDeclaration(
191 M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
192 CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta");
193 }
194
195 return CVTA;
196 }
197
198156 Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
199157 IRBuilder<> &Builder) {
200158 // If the constant C has been converted already in the given function F, just
206164
207165 Value *NewValue = C;
208166 if (isa(C)) {
209 // If the constant C is a global variable and is found in GVMap, generate a
210 // set set of instructions that convert the clone of C with the global
211 // address space specifier to a generic pointer.
212 // The constant C cannot be used here, as it will be erased from the
213 // module eventually. And the clone of C with the global address space
214 // specifier cannot be used here either, as it will affect the types of
215 // other instructions in the function. Hence, this address space conversion
216 // is required.
167 // If the constant C is a global variable and is found in GVMap, substitute
168 //
169 // addrspacecast GVMap[C] to addrspace(0)
170 //
171 // for our use of C.
217172 GVMapTy::iterator I = GVMap.find(cast(C));
218173 if (I != GVMap.end()) {
219 NewValue = getOrInsertCVTA(M, F, I->second, Builder);
174 GlobalVariable *GV = I->second;
175 NewValue = Builder.CreateAddrSpaceCast(
176 GV,
177 PointerType::get(GV->getValueType(), llvm::ADDRESS_SPACE_GENERIC));
220178 }
221179 } else if (isa(C)) {
222180 // If any element in the constant vector or aggregate C is or uses a global
44
55 @array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
66 @scalar = internal addrspace(3) global float 0.000000e+00, align 4
7 @generic_scalar = internal global float 0.000000e+00, align 4
8
9 define float @ld_from_shared() {
10 %1 = addrspacecast float* @generic_scalar to float addrspace(3)*
11 %2 = load float, float addrspace(3)* %1
12 ret float %2
13 }
147
158 ; Verifies nvptx-favor-non-generic correctly optimizes generic address space
169 ; usage to non-generic address space usage for the patterns we claim to handle:
1515 ;CHECK-LABEL: @func()
1616 ;CHECK-SAME: !dbg [[FUNCNODE:![0-9]+]]
1717 entry:
18 ; References to the variables must be converted back to generic address space via llvm intrinsic call
19 ; CHECK-DAG: call i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8({{.*}} addrspace(1)* @.str
18 ; References to the variables must be converted back to generic address space.
19 ; CHECK-DAG: addrspacecast ([4 x i8] addrspace(1)* @.str to [4 x i8]*)
2020 %0 = load i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), align 1
2121 call void @extfunc(i8 signext %0)
22 ; CHECK-DAG: call i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)* @static_var
22 ; CHECK-DAG: addrspacecast (i8 addrspace(1)* @static_var to i8*)
2323 %1 = load i8, i8* @static_var, align 1
2424 call void @extfunc(i8 signext %1)
2525 ret void
66
77 ; CHECK: .global .align 4 .u32 myglobal = 42;
88 @myglobal = internal global i32 42, align 4
9 ; CHECK: .global .align 4 .u32 myconst = 42;
10 @myconst = internal constant i32 42, align 4
9 ; CHECK: .global .align 4 .u32 myconst = 420;
10 @myconst = internal constant i32 420, align 4
1111
1212
1313 define void @foo(i32* %a, i32* %b) {
14 ; CHECK: cvta.global.u32
14 ; Expect one load -- @myconst isn't loaded from, because we know its value
15 ; statically.
16 ; CHECK: ld.global.u32
17 ; CHECK: st.global.u32
18 ; CHECK: st.global.u32
1519 %ld1 = load i32, i32* @myglobal
16 ; CHECK: cvta.global.u32
1720 %ld2 = load i32, i32* @myconst
1821 store i32 %ld1, i32* %a
1922 store i32 %ld2, i32* %b