llvm.org GIT mirror llvm / 74eabdd
Introduce llvm.load.relative intrinsic. This intrinsic takes two arguments, ``%ptr`` and ``%offset``. It loads a 32-bit value from the address ``%ptr + %offset``, adds ``%ptr`` to that value and returns it. The constant folder specifically recognizes the form of this intrinsic and the constant initializers it may load from; if a loaded constant initializer is known to have the form ``i32 trunc(x - %ptr)``, the intrinsic call is folded to ``x``. LLVM provides that the calculation of such a constant initializer will not overflow at link time under the medium code model if ``x`` is an ``unnamed_addr`` function. However, it does not provide this guarantee for a constant initializer folded into a function body. This intrinsic can be used to avoid the possibility of overflows when loading from such a constant. Differential Revision: http://reviews.llvm.org/D18367 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@267223 91177308-0d34-0410-b5e6-96231b3b80d8 Peter Collingbourne 3 years ago
15 changed file(s) with 311 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
1236012360 ``@llvm.experimental.guard`` cannot be invoked.
1236112361
1236212362
12363 '``llvm.load.relative``' Intrinsic
12364 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12365
12366 Syntax:
12367 """""""
12368
12369 ::
12370
12371 declare i8* @llvm.load.relative.iN(i8* %ptr, iN %offset) argmemonly nounwind readonly
12372
12373 Overview:
12374 """""""""
12375
12376 This intrinsic loads a 32-bit value from the address ``%ptr + %offset``,
12377 adds ``%ptr`` to that value and returns it. The constant folder specifically
12378 recognizes the form of this intrinsic and the constant initializers it may
12379 load from; if a loaded constant initializer is known to have the form
12380 ``i32 trunc(x - %ptr)``, the intrinsic call is folded to ``x``.
12381
12382 LLVM provides that the calculation of such a constant initializer will
12383 not overflow at link time under the medium code model if ``x`` is an
12384 ``unnamed_addr`` function. However, it does not provide this guarantee for
12385 a constant initializer folded into a function body. This intrinsic can be
12386 used to avoid the possibility of overflows when loading from such a constant.
12387
1236312388 Stack Map Intrinsics
1236412389 --------------------
1236512390
679679 ///
680680 ModulePass *createLowerEmuTLSPass(const TargetMachine *TM);
681681
682 /// This pass lowers the @llvm.load.relative intrinsic to instructions.
683 /// This is unsafe to do earlier because a pass may combine the constant
684 /// initializer into the load, which may result in an overflowing evaluation.
685 ModulePass *createPreISelIntrinsicLoweringPass();
686
682687 /// GlobalMerge - This pass merges internal (by default) globals into structs
683688 /// to enable reuse of a base pointer by indexed addressing modes.
684689 /// It can also be configured to focus on size optimizations only.
668668 def int_bitset_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
669669 [IntrNoMem]>;
670670
671 def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
672 [IntrReadMem, IntrArgMemOnly]>;
673
671674 //===----------------------------------------------------------------------===//
672675 // Target-specific intrinsics
673676 //===----------------------------------------------------------------------===//
245245 void initializePostRAHazardRecognizerPass(PassRegistry&);
246246 void initializePostRASchedulerPass(PassRegistry&);
247247 void initializePostMachineSchedulerPass(PassRegistry&);
248 void initializePreISelIntrinsicLoweringPass(PassRegistry&);
248249 void initializePrintFunctionPassWrapperPass(PassRegistry&);
249250 void initializePrintModulePassWrapperPass(PassRegistry&);
250251 void initializePrintBasicBlockPassPass(PassRegistry&);
899899 default:
900900 return Base::visitCallSite(CS);
901901
902 case Intrinsic::load_relative:
903 // This is normally lowered to 4 LLVM instructions.
904 Cost += 3 * InlineConstants::InstrCost;
905 return false;
906
902907 case Intrinsic::memset:
903908 case Intrinsic::memcpy:
904909 case Intrinsic::memmove:
38243824 }
38253825 }
38263826
3827 static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset,
3828 const DataLayout &DL) {
3829 GlobalValue *PtrSym;
3830 APInt PtrOffset;
3831 if (!IsConstantOffsetFromGlobal(Ptr, PtrSym, PtrOffset, DL))
3832 return nullptr;
3833
3834 Type *Int8PtrTy = Type::getInt8PtrTy(Ptr->getContext());
3835 Type *Int32Ty = Type::getInt32Ty(Ptr->getContext());
3836 Type *Int32PtrTy = Int32Ty->getPointerTo();
3837 Type *Int64Ty = Type::getInt64Ty(Ptr->getContext());
3838
3839 auto *OffsetConstInt = dyn_cast(Offset);
3840 if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64)
3841 return nullptr;
3842
3843 uint64_t OffsetInt = OffsetConstInt->getSExtValue();
3844 if (OffsetInt % 4 != 0)
3845 return nullptr;
3846
3847 Constant *C = ConstantExpr::getGetElementPtr(
3848 Int32Ty, ConstantExpr::getBitCast(Ptr, Int32PtrTy),
3849 ConstantInt::get(Int64Ty, OffsetInt / 4));
3850 Constant *Loaded = ConstantFoldLoadFromConstPtr(C, Int32Ty, DL);
3851 if (!Loaded)
3852 return nullptr;
3853
3854 auto *LoadedCE = dyn_cast(Loaded);
3855 if (!LoadedCE)
3856 return nullptr;
3857
3858 if (LoadedCE->getOpcode() == Instruction::Trunc) {
3859 LoadedCE = dyn_cast(LoadedCE->getOperand(0));
3860 if (!LoadedCE)
3861 return nullptr;
3862 }
3863
3864 if (LoadedCE->getOpcode() != Instruction::Sub)
3865 return nullptr;
3866
3867 auto *LoadedLHS = dyn_cast(LoadedCE->getOperand(0));
3868 if (!LoadedLHS || LoadedLHS->getOpcode() != Instruction::PtrToInt)
3869 return nullptr;
3870 auto *LoadedLHSPtr = LoadedLHS->getOperand(0);
3871
3872 Constant *LoadedRHS = LoadedCE->getOperand(1);
3873 GlobalValue *LoadedRHSSym;
3874 APInt LoadedRHSOffset;
3875 if (!IsConstantOffsetFromGlobal(LoadedRHS, LoadedRHSSym, LoadedRHSOffset,
3876 DL) ||
3877 PtrSym != LoadedRHSSym || PtrOffset != LoadedRHSOffset)
3878 return nullptr;
3879
3880 return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy);
3881 }
3882
38273883 template
38283884 static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
38293885 const Query &Q, unsigned MaxRecurse) {
38643920 if (match(RHS, m_Undef()))
38653921 return Constant::getNullValue(ReturnType);
38663922 }
3923
3924 if (IID == Intrinsic::load_relative && isa(LHS) &&
3925 isa(RHS))
3926 return SimplifyRelativeLoad(cast(LHS), cast(RHS),
3927 Q.DL);
38673928 }
38683929
38693930 // Perform idempotent optimizations
8686 PHIEliminationUtils.cpp
8787 PostRAHazardRecognizer.cpp
8888 PostRASchedulerList.cpp
89 PreISelIntrinsicLowering.cpp
8990 ProcessImplicitDefs.cpp
9091 PrologEpilogInserter.cpp
9192 PseudoSourceValue.cpp
6262 initializePostMachineSchedulerPass(Registry);
6363 initializePostRAHazardRecognizerPass(Registry);
6464 initializePostRASchedulerPass(Registry);
65 initializePreISelIntrinsicLoweringPass(Registry);
6566 initializeProcessImplicitDefsPass(Registry);
6667 initializeRegisterCoalescerPass(Registry);
6768 initializeShrinkWrapPass(Registry);
101101 if (TM->Options.EmulatedTLS)
102102 PM.add(createLowerEmuTLSPass(TM));
103103
104 PM.add(createPreISelIntrinsicLoweringPass());
105
104106 // Add internal analysis passes from the target machine.
105107 PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
106108
0 //===-- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ---===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements IR lowering for the llvm.load.relative intrinsic.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/CodeGen/Passes.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/IRBuilder.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/Intrinsics.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/Pass.h"
20
21 using namespace llvm;
22
23 namespace {
24
25 bool lowerLoadRelative(Function &F) {
26 if (F.use_empty())
27 return false;
28
29 bool Changed = false;
30 Type *Int32Ty = Type::getInt32Ty(F.getContext());
31 Type *Int32PtrTy = Int32Ty->getPointerTo();
32 Type *Int8Ty = Type::getInt8Ty(F.getContext());
33
34 for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
35 auto CI = dyn_cast(I->getUser());
36 ++I;
37 if (!CI || CI->getCalledValue() != &F)
38 continue;
39
40 IRBuilder<> B(CI);
41 Value *OffsetPtr =
42 B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
43 Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
44 Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4);
45
46 Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
47
48 CI->replaceAllUsesWith(ResultPtr);
49 CI->eraseFromParent();
50 Changed = true;
51 }
52
53 return Changed;
54 }
55
56 bool lowerIntrinsics(Module &M) {
57 bool Changed = false;
58 for (Function &F : M) {
59 if (F.getName().startswith("llvm.load.relative."))
60 Changed |= lowerLoadRelative(F);
61 }
62 return Changed;
63 }
64
65 class PreISelIntrinsicLowering : public ModulePass {
66 public:
67 static char ID;
68 PreISelIntrinsicLowering() : ModulePass(ID) {}
69
70 bool runOnModule(Module &M) {
71 return lowerIntrinsics(M);
72 }
73 };
74
75 char PreISelIntrinsicLowering::ID;
76
77 }
78
79 INITIALIZE_PASS(PreISelIntrinsicLowering, "pre-isel-intrinsic-lowering",
80 "Pre-ISel Intrinsic Lowering", false, false)
81
82 ModulePass *llvm::createPreISelIntrinsicLoweringPass() {
83 return new PreISelIntrinsicLowering;
84 }
55 ; STOP-NEXT: Machine Function Analysis
66 ; STOP-NEXT: MIR Printing Pass
77
8 ; START: -machine-branch-prob -gc-lowering
8 ; START: -machine-branch-prob -pre-isel-intrinsic-lowering
99 ; START: FunctionPass Manager
1010 ; START-NEXT: Lower Garbage Collection Instructions
0 ; RUN: opt < %s -instsimplify -S | FileCheck %s
1
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
3 target triple = "i386-unknown-linux-gnu"
4
5 @a = external global i8
6
7 @c1 = constant [3 x i32] [i32 0, i32 0,
8 i32 sub (i32 ptrtoint (i8* @a to i32), i32 ptrtoint (i32* getelementptr ([3 x i32], [3 x i32]* @c1, i32 0, i32 2) to i32))
9 ]
10
11 ; CHECK: @f1
12 define i8* @f1() {
13 ; CHECK: ret i8* @a
14 %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([3 x i32], [3 x i32]* @c1, i32 0, i32 2) to i8*), i32 0)
15 ret i8* %l
16 }
17
18 declare i8* @llvm.load.relative.i32(i8*, i32)
0 ; RUN: opt < %s -instsimplify -S | FileCheck %s
1
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3 target triple = "x86_64-unknown-linux-gnu"
4
5 @a = external global i8
6 @b = external global i8
7
8 @c1 = constant i32 trunc (i64 sub (i64 ptrtoint (i8* @a to i64), i64 ptrtoint (i32* @c1 to i64)) to i32)
9 @c2 = constant [7 x i32] [i32 0, i32 0,
10 i32 trunc (i64 sub (i64 ptrtoint (i8* @a to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
11 i32 trunc (i64 sub (i64 ptrtoint (i8* @b to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
12 i32 trunc (i64 add (i64 ptrtoint (i8* @b to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
13 i32 trunc (i64 sub (i64 ptrtoint (i8* @b to i64), i64 1) to i32),
14 i32 trunc (i64 sub (i64 0, i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32)
15 ]
16
17 ; CHECK: @f1
18 define i8* @f1() {
19 ; CHECK: ret i8* @a
20 %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* @c1 to i8*), i32 0)
21 ret i8* %l
22 }
23
24 ; CHECK: @f2
25 define i8* @f2() {
26 ; CHECK: ret i8* @a
27 %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 0)
28 ret i8* %l
29 }
30
31 ; CHECK: @f3
32 define i8* @f3() {
33 ; CHECK: ret i8* @b
34 %l = call i8* @llvm.load.relative.i64(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i64 4)
35 ret i8* %l
36 }
37
38 ; CHECK: @f4
39 define i8* @f4() {
40 ; CHECK: ret i8* %
41 %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 1)
42 ret i8* %l
43 }
44
45 ; CHECK: @f5
46 define i8* @f5() {
47 ; CHECK: ret i8* %
48 %l = call i8* @llvm.load.relative.i32(i8* zeroinitializer, i32 0)
49 ret i8* %l
50 }
51
52 ; CHECK: @f6
53 define i8* @f6() {
54 ; CHECK: ret i8* %
55 %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 8)
56 ret i8* %l
57 }
58
59 ; CHECK: @f7
60 define i8* @f7() {
61 ; CHECK: ret i8* %
62 %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 12)
63 ret i8* %l
64 }
65
66 ; CHECK: @f8
67 define i8* @f8() {
68 ; CHECK: ret i8* %
69 %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 16)
70 ret i8* %l
71 }
72
73 declare i8* @llvm.load.relative.i32(i8*, i32)
74 declare i8* @llvm.load.relative.i64(i8*, i64)
0 ; RUN: opt -pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
1
2 ; CHECK: define i8* @foo32(i8* [[P:%.*]], i32 [[O:%.*]])
3 define i8* @foo32(i8* %p, i32 %o) {
4 ; CHECK: [[OP:%.*]] = getelementptr i8, i8* [[P]], i32 [[O]]
5 ; CHECK: [[OPI32:%.*]] = bitcast i8* [[OP]] to i32*
6 ; CHECK: [[OI32:%.*]] = load i32, i32* [[OPI32]], align 4
7 ; CHECK: [[R:%.*]] = getelementptr i8, i8* [[P]], i32 [[OI32]]
8 ; CHECK: ret i8* [[R]]
9 %l = call i8* @llvm.load.relative.i32(i8* %p, i32 %o)
10 ret i8* %l
11 }
12
13 ; CHECK: define i8* @foo64(i8* [[P:%.*]], i64 [[O:%.*]])
14 define i8* @foo64(i8* %p, i64 %o) {
15 ; CHECK: [[OP:%.*]] = getelementptr i8, i8* [[P]], i64 [[O]]
16 ; CHECK: [[OPI32:%.*]] = bitcast i8* [[OP]] to i32*
17 ; CHECK: [[OI32:%.*]] = load i32, i32* [[OPI32]], align 4
18 ; CHECK: [[R:%.*]] = getelementptr i8, i8* [[P]], i32 [[OI32]]
19 ; CHECK: ret i8* [[R]]
20 %l = call i8* @llvm.load.relative.i64(i8* %p, i64 %o)
21 ret i8* %l
22 }
23
24 declare i8* @llvm.load.relative.i32(i8*, i32)
25 declare i8* @llvm.load.relative.i64(i8*, i64)
352352 initializeDwarfEHPreparePass(Registry);
353353 initializeSafeStackPass(Registry);
354354 initializeSjLjEHPreparePass(Registry);
355 initializePreISelIntrinsicLoweringPass(Registry);
355356
356357 #ifdef LINK_POLLY_INTO_TOOLS
357358 polly::initializePollyPasses(Registry);