llvm.org GIT mirror llvm / 0c4f69f
Remove the ScalarReplAggregates pass Nearly all the changes to this pass have been done while maintaining and updating other parts of LLVM. LLVM has had another pass, SROA, which has superseded ScalarReplAggregates for quite some time. Differential Revision: http://reviews.llvm.org/D21316 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272737 91177308-0d34-0410-b5e6-96231b3b80d8 David Majnemer 3 years ago
83 changed file(s) with 30 addition(s) and 5114 deletion(s). Raw diff Collapse all Expand all
126126 : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
127127 = "llvm_add_sccp"
128128
129 (** See the [llvm::createScalarReplAggregatesPass] function. *)
129 (** See the [llvm::createSROAPass] function. *)
130130 external add_scalar_repl_aggregation
131131 : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
132132 = "llvm_add_scalar_repl_aggregates"
133133
134 (** See the [llvm::createScalarReplAggregatesPassSSA] function. *)
134 (** See the [llvm::createSROAPass] function. *)
135135 external add_scalar_repl_aggregation_ssa
136136 : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
137137 = "llvm_add_scalar_repl_aggregates_ssa"
138138
139 (** See the [llvm::createScalarReplAggregatesWithThreshold] function. *)
139 (** See the [llvm::createSROAPass] function. *)
140140 external add_scalar_repl_aggregation_with_threshold
141141 : int -> [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
142142 = "llvm_add_scalar_repl_aggregates_with_threshold"
397397 MemoryDependencyAnalysis (which is also used by other passes like GVN).
398398
399399 * Folding a load: Any atomic load from a constant global can be constant-folded,
400 because it cannot be observed. Similar reasoning allows scalarrepl with
400 because it cannot be observed. Similar reasoning allows sroa with
401401 atomic loads and stores.
402402
403403 Atomics and Codegen
946946 the entry block is split into two, such that all introduced ``alloca``
947947 instructions (and nothing else) are in the entry block.
948948
949 ``-scalarrepl``: Scalar Replacement of Aggregates (DT)
949 ``-sroa``: Scalar Replacement of Aggregates
950950 ------------------------------------------------------
951951
952952 The well-known scalar replacement of aggregates transformation. This transform
954954 individual ``alloca`` instructions for each member if possible. Then, if
955955 possible, it transforms the individual ``alloca`` instructions into nice clean
956956 scalar SSA form.
957
958 This combines a simple scalar replacement of aggregates algorithm with the
959 :ref:`mem2reg ` algorithm because they often interact,
960 especially for C++ programs. As such, iterating between ``scalarrepl``, then
961 :ref:`mem2reg ` until we run out of things to promote works
962 well.
963957
964958 .. _passes-sccp:
965959
223223 class <../LangRef.html#first-class-types>`_ values (such as pointers,
224224 scalars and vectors), and only if the array size of the allocation is
225225 1 (or missing in the .ll file). mem2reg is not capable of promoting
226 structs or arrays to registers. Note that the "scalarrepl" pass is
226 structs or arrays to registers. Note that the "sroa" pass is
227227 more powerful and can promote structs, "unions", and arrays in many
228228 cases.
229229
223223 class <../LangRef.html#first-class-types>`_ values (such as pointers,
224224 scalars and vectors), and only if the array size of the allocation is
225225 1 (or missing in the .ll file). mem2reg is not capable of promoting
226 structs or arrays to registers. Note that the "scalarrepl" pass is
226 structs or arrays to registers. Note that the "sroa" pass is
227227 more powerful and can promote structs, "unions", and arrays in many
228228 cases.
229229
285285 void initializeSCEVAAWrapperPassPass(PassRegistry&);
286286 void initializeSLPVectorizerPass(PassRegistry&);
287287 void initializeSROALegacyPassPass(PassRegistry&);
288 void initializeSROA_DTPass(PassRegistry&);
289 void initializeSROA_SSAUpPass(PassRegistry&);
290288 void initializeSafeStackPass(PassRegistry&);
291289 void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&);
292290 void initializeSanitizerCoverageModulePass(PassRegistry&);
144144 (void) llvm::createRegionViewerPass();
145145 (void) llvm::createSCCPPass();
146146 (void) llvm::createSafeStackPass();
147 (void) llvm::createScalarReplAggregatesPass();
147 (void) llvm::createSROAPass();
148148 (void) llvm::createSingleLoopExtractorPass();
149149 (void) llvm::createStripSymbolsPass();
150150 (void) llvm::createStripNonDebugSymbolsPass();
105105
106106 //===----------------------------------------------------------------------===//
107107 //
108 // ScalarReplAggregates - Break up alloca's of aggregates into multiple allocas
109 // if possible.
110 //
111 FunctionPass *createScalarReplAggregatesPass(signed Threshold = -1,
112 bool UseDomTree = true,
113 signed StructMemberThreshold = -1,
114 signed ArrayElementThreshold = -1,
115 signed ScalarLoadThreshold = -1);
116
117 //===----------------------------------------------------------------------===//
118 //
119108 // InductiveRangeCheckElimination - Transform loops to elide range checks on
120109 // linear functions of the induction variable.
121110 //
103103 /** See llvm::createSCCPPass function. */
104104 void LLVMAddSCCPPass(LLVMPassManagerRef PM);
105105
106 /** See llvm::createScalarReplAggregatesPass function. */
106 /** See llvm::createSROAPass function. */
107107 void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM);
108108
109 /** See llvm::createScalarReplAggregatesPass function. */
109 /** See llvm::createSROAPass function. */
110110 void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM);
111111
112 /** See llvm::createScalarReplAggregatesPass function. */
112 /** See llvm::createSROAPass function. */
113113 void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
114114 int Threshold);
115115
118118 initializeArgPromotionPass(R);
119119 initializeJumpThreadingPass(R);
120120 initializeSROALegacyPassPass(R);
121 initializeSROA_DTPass(R);
122 initializeSROA_SSAUpPass(R);
123121 initializePostOrderFunctionAttrsLegacyPassPass(R);
124122 initializeReversePostOrderFunctionAttrsLegacyPassPass(R);
125123 initializeGlobalsAAWrapperPassPass(R);
20802080 }
20812081
20822082 We currently compile this to:
2083 $ clang t.c -S -o - -O0 -emit-llvm | opt -scalarrepl -S
2083 $ clang t.c -S -o - -O0 -emit-llvm | opt -sroa -S
20842084
20852085
20862086 %struct.x = type { i8, [4 x i32] }
169169 of the optimizations which are possible if we know the address of a va_list
170170 in the current function is never taken:
171171 1. We shouldn't spill the XMM registers because we only call va_arg with "int".
172 2. It would be nice if we could scalarrepl the va_list.
172 2. It would be nice if we could sroa the va_list.
173173 3. Probably overkill, but it'd be cool if we could peel off the first five
174174 iterations of the loop.
175175
306306 }
307307
308308 // Safe to transform, don't even bother trying to "promote" it.
309 // Passing the elements as a scalar will allow scalarrepl to hack on
309 // Passing the elements as a scalar will allow sroa to hack on
310310 // the new alloca we introduce.
311311 if (AllSimple) {
312312 ByValArgsToTransform.insert(PtrArg);
6060 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
6161 cl::desc("Run cleanup optimization passes after vectorization."));
6262
63 static cl::opt UseNewSROA("use-new-sroa",
64 cl::init(true), cl::Hidden,
65 cl::desc("Enable the new, experimental SROA pass"));
66
6763 static cl::opt
6864 RunLoopRerolling("reroll-loops", cl::Hidden,
6965 cl::desc("Run the loop rerolling pass"));
200196 addInitialAliasAnalysisPasses(FPM);
201197
202198 FPM.add(createCFGSimplificationPass());
203 if (UseNewSROA)
204 FPM.add(createSROAPass());
205 else
206 FPM.add(createScalarReplAggregatesPass());
199 FPM.add(createSROAPass());
207200 FPM.add(createEarlyCSEPass());
208201 FPM.add(createLowerExpectIntrinsicPass());
209202 }
224217 legacy::PassManagerBase &MPM) {
225218 // Start of function pass.
226219 // Break up aggregate allocas, using SSAUpdater.
227 if (UseNewSROA)
228 MPM.add(createSROAPass());
229 else
230 MPM.add(createScalarReplAggregatesPass(-1, false));
220 MPM.add(createSROAPass());
231221 MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
232222 // Speculative execution if the target has divergent branches; otherwise nop.
233223 MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
653643 PM.add(createJumpThreadingPass());
654644
655645 // Break up allocas
656 if (UseNewSROA)
657 PM.add(createSROAPass());
658 else
659 PM.add(createScalarReplAggregatesPass());
646 PM.add(createSROAPass());
660647
661648 // Run a few AA driven optimizations here and now, to cleanup the code.
662649 PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
4444 SCCP.cpp
4545 SROA.cpp
4646 Scalar.cpp
47 ScalarReplAggregates.cpp
4847 Scalarizer.cpp
4948 SeparateConstOffsetFromGEP.cpp
5049 SimplifyCFGPass.cpp
7373 initializeSCCPLegacyPassPass(Registry);
7474 initializeIPSCCPLegacyPassPass(Registry);
7575 initializeSROALegacyPassPass(Registry);
76 initializeSROA_DTPass(Registry);
77 initializeSROA_SSAUpPass(Registry);
7876 initializeCFGSimplifyPassPass(Registry);
7977 initializeStructurizeCFGPass(Registry);
8078 initializeSinkingLegacyPassPass(Registry);
197195 }
198196
199197 void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
200 unwrap(PM)->add(createScalarReplAggregatesPass());
198 unwrap(PM)->add(createSROAPass());
201199 }
202200
203201 void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
204 unwrap(PM)->add(createScalarReplAggregatesPass(-1, false));
202 unwrap(PM)->add(createSROAPass());
205203 }
206204
207205 void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
208206 int Threshold) {
209 unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
207 unwrap(PM)->add(createSROAPass());
210208 }
211209
212210 void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
+0
-2618
lib/Transforms/Scalar/ScalarReplAggregates.cpp less more
None //===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation implements the well known scalar replacement of
10 // aggregates transformation. This xform breaks up alloca instructions of
11 // aggregate type (structure or array) into individual alloca instructions for
12 // each member (if possible). Then, if possible, it transforms the individual
13 // alloca instructions into nice clean scalar SSA form.
14 //
15 // This combines a simple SRoA algorithm with the Mem2Reg algorithm because they
16 // often interact, especially for C++ programs. As such, iterating between
17 // SRoA, then Mem2Reg until we run out of things to promote works well.
18 //
19 //===----------------------------------------------------------------------===//
20
21 #include "llvm/Transforms/Scalar.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/AssumptionCache.h"
26 #include "llvm/Analysis/Loads.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DIBuilder.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DebugInfo.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/GetElementPtrTypeIterator.h"
37 #include "llvm/IR/GlobalVariable.h"
38 #include "llvm/IR/IRBuilder.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/IR/Module.h"
43 #include "llvm/IR/Operator.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Debug.h"
46 #include "llvm/Support/ErrorHandling.h"
47 #include "llvm/Support/MathExtras.h"
48 #include "llvm/Support/raw_ostream.h"
49 #include "llvm/Transforms/Utils/Local.h"
50 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
51 #include "llvm/Transforms/Utils/SSAUpdater.h"
52 using namespace llvm;
53
54 #define DEBUG_TYPE "scalarrepl"
55
56 STATISTIC(NumReplaced, "Number of allocas broken up");
57 STATISTIC(NumPromoted, "Number of allocas promoted");
58 STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
59 STATISTIC(NumConverted, "Number of aggregates converted to scalar");
60
61 namespace {
62 #define SROA SROA_
63 struct SROA : public FunctionPass {
64 SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT)
65 : FunctionPass(ID), HasDomTree(hasDT) {
66 if (T == -1)
67 SRThreshold = 128;
68 else
69 SRThreshold = T;
70 if (ST == -1)
71 StructMemberThreshold = 32;
72 else
73 StructMemberThreshold = ST;
74 if (AT == -1)
75 ArrayElementThreshold = 8;
76 else
77 ArrayElementThreshold = AT;
78 if (SLT == -1)
79 // Do not limit the scalar integer load size if no threshold is given.
80 ScalarLoadThreshold = -1;
81 else
82 ScalarLoadThreshold = SLT;
83 }
84
85 bool runOnFunction(Function &F) override;
86
87 bool performScalarRepl(Function &F);
88 bool performPromotion(Function &F);
89
90 private:
91 bool HasDomTree;
92
93 /// DeadInsts - Keep track of instructions we have made dead, so that
94 /// we can remove them after we are done working.
95 SmallVector DeadInsts;
96
97 /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
98 /// information about the uses. All these fields are initialized to false
99 /// and set to true when something is learned.
100 struct AllocaInfo {
101 /// The alloca to promote.
102 AllocaInst *AI;
103
104 /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
105 /// looping and avoid redundant work.
106 SmallPtrSet CheckedPHIs;
107
108 /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
109 bool isUnsafe : 1;
110
111 /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
112 bool isMemCpySrc : 1;
113
114 /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
115 bool isMemCpyDst : 1;
116
117 /// hasSubelementAccess - This is true if a subelement of the alloca is
118 /// ever accessed, or false if the alloca is only accessed with mem
119 /// intrinsics or load/store that only access the entire alloca at once.
120 bool hasSubelementAccess : 1;
121
122 /// hasALoadOrStore - This is true if there are any loads or stores to it.
123 /// The alloca may just be accessed with memcpy, for example, which would
124 /// not set this.
125 bool hasALoadOrStore : 1;
126
127 explicit AllocaInfo(AllocaInst *ai)
128 : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
129 hasSubelementAccess(false), hasALoadOrStore(false) {}
130 };
131
132 /// SRThreshold - The maximum alloca size to considered for SROA.
133 unsigned SRThreshold;
134
135 /// StructMemberThreshold - The maximum number of members a struct can
136 /// contain to be considered for SROA.
137 unsigned StructMemberThreshold;
138
139 /// ArrayElementThreshold - The maximum number of elements an array can
140 /// have to be considered for SROA.
141 unsigned ArrayElementThreshold;
142
143 /// ScalarLoadThreshold - The maximum size in bits of scalars to load when
144 /// converting to scalar
145 unsigned ScalarLoadThreshold;
146
147 void MarkUnsafe(AllocaInfo &I, Instruction *User) {
148 I.isUnsafe = true;
149 DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
150 }
151
152 bool isSafeAllocaToScalarRepl(AllocaInst *AI);
153
154 void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
155 void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
156 AllocaInfo &Info);
157 void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
158 void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
159 Type *MemOpType, bool isStore, AllocaInfo &Info,
160 Instruction *TheAccess, bool AllowWholeAccess);
161 bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
162 const DataLayout &DL);
163 uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy,
164 const DataLayout &DL);
165
166 void DoScalarReplacement(AllocaInst *AI,
167 std::vector &WorkList);
168 void DeleteDeadInstructions();
169
170 void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
171 SmallVectorImpl &NewElts);
172 void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
173 SmallVectorImpl &NewElts);
174 void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
175 SmallVectorImpl &NewElts);
176 void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
177 uint64_t Offset,
178 SmallVectorImpl &NewElts);
179 void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
180 AllocaInst *AI,
181 SmallVectorImpl &NewElts);
182 void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
183 SmallVectorImpl &NewElts);
184 void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
185 SmallVectorImpl &NewElts);
186 bool ShouldAttemptScalarRepl(AllocaInst *AI);
187 };
188
189 // SROA_DT - SROA that uses DominatorTree.
190 struct SROA_DT : public SROA {
191 static char ID;
192 public:
193 SROA_DT(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
194 SROA(T, true, ID, ST, AT, SLT) {
195 initializeSROA_DTPass(*PassRegistry::getPassRegistry());
196 }
197
198 // getAnalysisUsage - This pass does not require any passes, but we know it
199 // will not alter the CFG, so say so.
200 void getAnalysisUsage(AnalysisUsage &AU) const override {
201 AU.addRequired();
202 AU.addRequired();
203 AU.setPreservesCFG();
204 }
205 };
206
207 // SROA_SSAUp - SROA that uses SSAUpdater.
208 struct SROA_SSAUp : public SROA {
209 static char ID;
210 public:
211 SROA_SSAUp(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
212 SROA(T, false, ID, ST, AT, SLT) {
213 initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
214 }
215
216 // getAnalysisUsage - This pass does not require any passes, but we know it
217 // will not alter the CFG, so say so.
218 void getAnalysisUsage(AnalysisUsage &AU) const override {
219 AU.addRequired();
220 AU.setPreservesCFG();
221 }
222 };
223
224 }
225
226 char SROA_DT::ID = 0;
227 char SROA_SSAUp::ID = 0;
228
229 INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
230 "Scalar Replacement of Aggregates (DT)", false, false)
231 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
232 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
233 INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
234 "Scalar Replacement of Aggregates (DT)", false, false)
235
236 INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
237 "Scalar Replacement of Aggregates (SSAUp)", false, false)
238 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
239 INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
240 "Scalar Replacement of Aggregates (SSAUp)", false, false)
241
242 // Public interface to the ScalarReplAggregates pass
243 FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
244 bool UseDomTree,
245 int StructMemberThreshold,
246 int ArrayElementThreshold,
247 int ScalarLoadThreshold) {
248 if (UseDomTree)
249 return new SROA_DT(Threshold, StructMemberThreshold, ArrayElementThreshold,
250 ScalarLoadThreshold);
251 return new SROA_SSAUp(Threshold, StructMemberThreshold,
252 ArrayElementThreshold, ScalarLoadThreshold);
253 }
254
255
256 //===----------------------------------------------------------------------===//
257 // Convert To Scalar Optimization.
258 //===----------------------------------------------------------------------===//
259
260 namespace {
261 /// ConvertToScalarInfo - This class implements the "Convert To Scalar"
262 /// optimization, which scans the uses of an alloca and determines if it can
263 /// rewrite it in terms of a single new alloca that can be mem2reg'd.
264 class ConvertToScalarInfo {
265 /// AllocaSize - The size of the alloca being considered in bytes.
266 unsigned AllocaSize;
267 const DataLayout &DL;
268 unsigned ScalarLoadThreshold;
269
270 /// IsNotTrivial - This is set to true if there is some access to the object
271 /// which means that mem2reg can't promote it.
272 bool IsNotTrivial;
273
274 /// ScalarKind - Tracks the kind of alloca being considered for promotion,
275 /// computed based on the uses of the alloca rather than the LLVM type system.
276 enum {
277 Unknown,
278
279 // Accesses via GEPs that are consistent with element access of a vector
280 // type. This will not be converted into a vector unless there is a later
281 // access using an actual vector type.
282 ImplicitVector,
283
284 // Accesses via vector operations and GEPs that are consistent with the
285 // layout of a vector type.
286 Vector,
287
288 // An integer bag-of-bits with bitwise operations for insertion and
289 // extraction. Any combination of types can be converted into this kind
290 // of scalar.
291 Integer
292 } ScalarKind;
293
294 /// VectorTy - This tracks the type that we should promote the vector to if
295 /// it is possible to turn it into a vector. This starts out null, and if it
296 /// isn't possible to turn into a vector type, it gets set to VoidTy.
297 VectorType *VectorTy;
298
299 /// HadNonMemTransferAccess - True if there is at least one access to the
300 /// alloca that is not a MemTransferInst. We don't want to turn structs into
301 /// large integers unless there is some potential for optimization.
302 bool HadNonMemTransferAccess;
303
304 /// HadDynamicAccess - True if some element of this alloca was dynamic.
305 /// We don't yet have support for turning a dynamic access into a large
306 /// integer.
307 bool HadDynamicAccess;
308
309 public:
310 explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL,
311 unsigned SLT)
312 : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false),
313 ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false),
314 HadDynamicAccess(false) { }
315
316 AllocaInst *TryConvert(AllocaInst *AI);
317
318 private:
319 bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx);
320 void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
321 bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
322 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset,
323 Value *NonConstantIdx);
324
325 Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
326 uint64_t Offset, Value* NonConstantIdx,
327 IRBuilder<> &Builder);
328 Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
329 uint64_t Offset, Value* NonConstantIdx,
330 IRBuilder<> &Builder);
331 };
332 } // end anonymous namespace.
333
334
335 /// TryConvert - Analyze the specified alloca, and if it is safe to do so,
336 /// rewrite it to be a new alloca which is mem2reg'able. This returns the new
337 /// alloca if possible or null if not.
338 AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
339 // If we can't convert this scalar, or if mem2reg can trivially do it, bail
340 // out.
341 if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial)
342 return nullptr;
343
344 // If an alloca has only memset / memcpy uses, it may still have an Unknown
345 // ScalarKind. Treat it as an Integer below.
346 if (ScalarKind == Unknown)
347 ScalarKind = Integer;
348
349 if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
350 ScalarKind = Integer;
351
352 // If we were able to find a vector type that can handle this with
353 // insert/extract elements, and if there was at least one use that had
354 // a vector type, promote this to a vector. We don't want to promote
355 // random stuff that doesn't use vectors (e.g. <9 x double>) because then
356 // we just get a lot of insert/extracts. If at least one vector is
357 // involved, then we probably really do have a union of vector/array.
358 Type *NewTy;
359 if (ScalarKind == Vector) {
360 assert(VectorTy && "Missing type for vector scalar.");
361 DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
362 << *VectorTy << '\n');
363 NewTy = VectorTy; // Use the vector type.
364 } else {
365 unsigned BitWidth = AllocaSize * 8;
366
367 // Do not convert to scalar integer if the alloca size exceeds the
368 // scalar load threshold.
369 if (BitWidth > ScalarLoadThreshold)
370 return nullptr;
371
372 if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
373 !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth))
374 return nullptr;
375 // Dynamic accesses on integers aren't yet supported. They need us to shift
376 // by a dynamic amount which could be difficult to work out as we might not
377 // know whether to use a left or right shift.
378 if (ScalarKind == Integer && HadDynamicAccess)
379 return nullptr;
380
381 DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
382 // Create and insert the integer alloca.
383 NewTy = IntegerType::get(AI->getContext(), BitWidth);
384 }
385 AllocaInst *NewAI =
386 new AllocaInst(NewTy, nullptr, "", &AI->getParent()->front());
387 ConvertUsesToScalar(AI, NewAI, 0, nullptr);
388 return NewAI;
389 }
390
391 /// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
392 /// (VectorTy) so far at the offset specified by Offset (which is specified in
393 /// bytes).
394 ///
395 /// There are two cases we handle here:
396 /// 1) A union of vector types of the same size and potentially its elements.
397 /// Here we turn element accesses into insert/extract element operations.
398 /// This promotes a <4 x float> with a store of float to the third element
399 /// into a <4 x float> that uses insert element.
400 /// 2) A fully general blob of memory, which we turn into some (potentially
401 /// large) integer type with extract and insert operations where the loads
402 /// and stores would mutate the memory. We mark this by setting VectorTy
403 /// to VoidTy.
404 void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
405 uint64_t Offset) {
406 // If we already decided to turn this into a blob of integer memory, there is
407 // nothing to be done.
408 if (ScalarKind == Integer)
409 return;
410
411 // If this could be contributing to a vector, analyze it.
412
413 // If the In type is a vector that is the same size as the alloca, see if it
414 // matches the existing VecTy.
415 if (VectorType *VInTy = dyn_cast(In)) {
416 if (MergeInVectorType(VInTy, Offset))
417 return;
418 } else if (In->isFloatTy() || In->isDoubleTy() ||
419 (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
420 isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
421 // Full width accesses can be ignored, because they can always be turned
422 // into bitcasts.
423 unsigned EltSize = In->getPrimitiveSizeInBits()/8;
424 if (EltSize == AllocaSize)
425 return;
426
427 // If we're accessing something that could be an element of a vector, see
428 // if the implied vector agrees with what we already have and if Offset is
429 // compatible with it.
430 if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
431 (!VectorTy || EltSize == VectorTy->getElementType()
432 ->getPrimitiveSizeInBits()/8)) {
433 if (!VectorTy) {
434 ScalarKind = ImplicitVector;
435 VectorTy = VectorType::get(In, AllocaSize/EltSize);
436 }
437 return;
438 }
439 }
440
441 // Otherwise, we have a case that we can't handle with an optimized vector
442 // form. We can still turn this into a large integer.
443 ScalarKind = Integer;
444 }
445
446 /// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
447 /// returning true if the type was successfully merged and false otherwise.
448 bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
449 uint64_t Offset) {
450 if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
451 // If we're storing/loading a vector of the right size, allow it as a
452 // vector. If this the first vector we see, remember the type so that
453 // we know the element size. If this is a subsequent access, ignore it
454 // even if it is a differing type but the same size. Worst case we can
455 // bitcast the resultant vectors.
456 if (!VectorTy)
457 VectorTy = VInTy;
458 ScalarKind = Vector;
459 return true;
460 }
461
462 return false;
463 }
464
465 /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
466 /// its accesses to a single vector type, return true and set VecTy to
467 /// the new type. If we could convert the alloca into a single promotable
468 /// integer, return true but set VecTy to VoidTy. Further, if the use is not a
469 /// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
470 /// is the current offset from the base of the alloca being analyzed.
471 ///
472 /// If we see at least one access to the value that is as a vector type, set the
473 /// SawVec flag.
474 bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
475 Value* NonConstantIdx) {
476 for (User *U : V->users()) {
477 Instruction *UI = cast(U);
478
479 if (LoadInst *LI = dyn_cast(UI)) {
480 // Don't break volatile loads.
481 if (!LI->isSimple())
482 return false;
483 // Don't touch MMX operations.
484 if (LI->getType()->isX86_MMXTy())
485 return false;
486 HadNonMemTransferAccess = true;
487 MergeInTypeForLoadOrStore(LI->getType(), Offset);
488 continue;
489 }
490
491 if (StoreInst *SI = dyn_cast(UI)) {
492 // Storing the pointer, not into the value?
493 if (SI->getOperand(0) == V || !SI->isSimple()) return false;
494 // Don't touch MMX operations.
495 if (SI->getOperand(0)->getType()->isX86_MMXTy())
496 return false;
497 HadNonMemTransferAccess = true;
498 MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
499 continue;
500 }
501
502 if (BitCastInst *BCI = dyn_cast(UI)) {
503 if (!onlyUsedByLifetimeMarkers(BCI))
504 IsNotTrivial = true; // Can't be mem2reg'd.
505 if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
506 return false;
507 continue;
508 }
509
510 if (GetElementPtrInst *GEP = dyn_cast(UI)) {
511 // If this is a GEP with a variable indices, we can't handle it.
512 // Compute the offset that this GEP adds to the pointer.
513 SmallVector Indices(GEP->op_begin()+1, GEP->op_end());
514 Value *GEPNonConstantIdx = nullptr;
515 if (!GEP->hasAllConstantIndices()) {
516 if (!isa(GEP->getSourceElementType()))
517 return false;
518 if (NonConstantIdx)
519 return false;
520 GEPNonConstantIdx = Indices.pop_back_val();
521 if (!GEPNonConstantIdx->getType()->isIntegerTy(32))
522 return false;
523 HadDynamicAccess = true;
524 } else
525 GEPNonConstantIdx = NonConstantIdx;
526 uint64_t GEPOffset = DL.getIndexedOffsetInType(GEP->getSourceElementType(),
527 Indices);
528 // See if all uses can be converted.
529 if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx))
530 return false;
531 IsNotTrivial = true; // Can't be mem2reg'd.
532 HadNonMemTransferAccess = true;
533 continue;
534 }
535
536 // If this is a constant sized memset of a constant value (e.g. 0) we can
537 // handle it.
538 if (MemSetInst *MSI = dyn_cast(UI)) {
539 // Store to dynamic index.
540 if (NonConstantIdx)
541 return false;
542 // Store of constant value.
543 if (!isa(MSI->getValue()))
544 return false;
545
546 // Store of constant size.
547 ConstantInt *Len = dyn_cast(MSI->getLength());
548 if (!Len)
549 return false;
550
551 // If the size differs from the alloca, we can only convert the alloca to
552 // an integer bag-of-bits.
553 // FIXME: This should handle all of the cases that are currently accepted
554 // as vector element insertions.
555 if (Len->getZExtValue() != AllocaSize || Offset != 0)
556 ScalarKind = Integer;
557
558 IsNotTrivial = true; // Can't be mem2reg'd.
559 HadNonMemTransferAccess = true;
560 continue;
561 }
562
563 // If this is a memcpy or memmove into or out of the whole allocation, we
564 // can handle it like a load or store of the scalar type.
565 if (MemTransferInst *MTI = dyn_cast(UI)) {
566 // Store to dynamic index.
567 if (NonConstantIdx)
568 return false;
569 ConstantInt *Len = dyn_cast(MTI->getLength());
570 if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0)
571 return false;
572
573 IsNotTrivial = true; // Can't be mem2reg'd.
574 continue;
575 }
576
577 // If this is a lifetime intrinsic, we can handle it.
578 if (IntrinsicInst *II = dyn_cast(UI)) {
579 if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
580 II->getIntrinsicID() == Intrinsic::lifetime_end) {
581 continue;
582 }
583 }
584
585 // Otherwise, we cannot handle this!
586 return false;
587 }
588
589 return true;
590 }
591
592 /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
593 /// directly. This happens when we are converting an "integer union" to a
594 /// single integer scalar, or when we are converting a "vector union" to a
595 /// vector with insert/extractelement instructions.
596 ///
597 /// Offset is an offset from the original alloca, in bits that need to be
598 /// shifted to the right. By the end of this, there should be no uses of Ptr.
599 void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
600 uint64_t Offset,
601 Value* NonConstantIdx) {
602 while (!Ptr->use_empty()) {
603 Instruction *User = cast(Ptr->user_back());
604
605 if (BitCastInst *CI = dyn_cast(User)) {
606 ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
607 CI->eraseFromParent();
608 continue;
609 }
610
611 if (GetElementPtrInst *GEP = dyn_cast(User)) {
612 // Compute the offset that this GEP adds to the pointer.
613 SmallVector Indices(GEP->op_begin()+1, GEP->op_end());
614 Value* GEPNonConstantIdx = nullptr;
615 if (!GEP->hasAllConstantIndices()) {
616 assert(!NonConstantIdx &&
617 "Dynamic GEP reading from dynamic GEP unsupported");
618 GEPNonConstantIdx = Indices.pop_back_val();
619 } else
620 GEPNonConstantIdx = NonConstantIdx;
621 uint64_t GEPOffset = DL.getIndexedOffsetInType(GEP->getSourceElementType(),
622 Indices);
623 ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx);
624 GEP->eraseFromParent();
625 continue;
626 }
627
628 IRBuilder<> Builder(User);
629
630 if (LoadInst *LI = dyn_cast(User)) {
631 // The load is a bit extract from NewAI shifted right by Offset bits.
632 Value *LoadedVal = Builder.CreateLoad(NewAI);
633 Value *NewLoadVal
634 = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset,
635 NonConstantIdx, Builder);
636 LI->replaceAllUsesWith(NewLoadVal);
637 LI->eraseFromParent();
638 continue;
639 }
640
641 if (StoreInst *SI = dyn_cast(User)) {
642 assert(SI->getOperand(0) != Ptr && "Consistency error!");
643 Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
644 Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
645 NonConstantIdx, Builder);
646 Builder.CreateStore(New, NewAI);
647 SI->eraseFromParent();
648
649 // If the load we just inserted is now dead, then the inserted store
650 // overwrote the entire thing.
651 if (Old->use_empty())
652 Old->eraseFromParent();
653 continue;
654 }
655
656 // If this is a constant sized memset of a constant value (e.g. 0) we can
657 // transform it into a store of the expanded constant value.
658 if (MemSetInst *MSI = dyn_cast(User)) {
659 assert(MSI->getRawDest() == Ptr && "Consistency error!");
660 assert(!NonConstantIdx && "Cannot replace dynamic memset with insert");
661 int64_t SNumBytes = cast(MSI->getLength())->getSExtValue();
662 if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
663 unsigned NumBytes = static_cast(SNumBytes);
664 unsigned Val = cast(MSI->getValue())->getZExtValue();
665
666 // Compute the value replicated the right number of times.
667 APInt APVal(NumBytes*8, Val);
668
669 // Splat the value if non-zero.
670 if (Val)
671 for (unsigned i = 1; i != NumBytes; ++i)
672 APVal |= APVal << 8;
673
674 Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
675 Value *New = ConvertScalar_InsertValue(
676 ConstantInt::get(User->getContext(), APVal),
677 Old, Offset, nullptr, Builder);
678 Builder.CreateStore(New, NewAI);
679
680 // If the load we just inserted is now dead, then the memset overwrote
681 // the entire thing.
682 if (Old->use_empty())
683 Old->eraseFromParent();
684 }
685 MSI->eraseFromParent();
686 continue;
687 }
688
689 // If this is a memcpy or memmove into or out of the whole allocation, we
690 // can handle it like a load or store of the scalar type.
691 if (MemTransferInst *MTI = dyn_cast(User)) {
692 assert(Offset == 0 && "must be store to start of alloca");
693 assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert");
694
695 // If the source and destination are both to the same alloca, then this is
696 // a noop copy-to-self, just delete it. Otherwise, emit a load and store
697 // as appropriate.
698 AllocaInst *OrigAI = cast(GetUnderlyingObject(Ptr, DL, 0));
699
700 if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) {
701 // Dest must be OrigAI, change this to be a load from the original
702 // pointer (bitcasted), then a store to our new alloca.
703 assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
704 Value *SrcPtr = MTI->getSource();
705 PointerType* SPTy = cast(SrcPtr->getType());
706 PointerType* AIPTy = cast(NewAI->getType());
707 if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
708 AIPTy = PointerType::get(NewAI->getAllocatedType(),
709 SPTy->getAddressSpace());
710 }
711 SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
712
713 LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
714 SrcVal->setAlignment(MTI->getAlignment());
715 Builder.CreateStore(SrcVal, NewAI);
716 } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) {
717 // Src must be OrigAI, change this to be a load from NewAI then a store
718 // through the original dest pointer (bitcasted).
719 assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
720 LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
721
722 PointerType* DPTy = cast(MTI->getDest()->getType());
723 PointerType* AIPTy = cast(NewAI->getType());
724 if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
725 AIPTy = PointerType::get(NewAI->getAllocatedType(),
726 DPTy->getAddressSpace());
727 }
728 Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
729
730 StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
731 NewStore->setAlignment(MTI->getAlignment());
732 } else {
733 // Noop transfer. Src == Dst
734 }
735
736 MTI->eraseFromParent();
737 continue;
738 }
739
740 if (IntrinsicInst *II = dyn_cast(User)) {
741 if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
742 II->getIntrinsicID() == Intrinsic::lifetime_end) {
743 // There's no need to preserve these, as the resulting alloca will be
744 // converted to a register anyways.
745 II->eraseFromParent();
746 continue;
747 }
748 }
749
750 llvm_unreachable("Unsupported operation!");
751 }
752 }
753
754 /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
755 /// or vector value FromVal, extracting the bits from the offset specified by
756 /// Offset. This returns the value, which is of type ToType.
757 ///
758 /// This happens when we are converting an "integer union" to a single
759 /// integer scalar, or when we are converting a "vector union" to a vector with
760 /// insert/extractelement instructions.
761 ///
762 /// Offset is an offset from the original alloca, in bits that need to be
763 /// shifted to the right.
764 Value *ConvertToScalarInfo::
765 ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
766 uint64_t Offset, Value* NonConstantIdx,
767 IRBuilder<> &Builder) {
768 // If the load is of the whole new alloca, no conversion is needed.
769 Type *FromType = FromVal->getType();
770 if (FromType == ToType && Offset == 0)
771 return FromVal;
772
773 // If the result alloca is a vector type, this is either an element
774 // access or a bitcast to another vector type of the same size.
775 if (VectorType *VTy = dyn_cast(FromType)) {
776 unsigned FromTypeSize = DL.getTypeAllocSize(FromType);
777 unsigned ToTypeSize = DL.getTypeAllocSize(ToType);
778 if (FromTypeSize == ToTypeSize)
779 return Builder.CreateBitCast(FromVal, ToType);
780
781 // Otherwise it must be an element access.
782 unsigned Elt = 0;
783 if (Offset) {
784 unsigned EltSize = DL.getTypeAllocSizeInBits(VTy->getElementType());
785 Elt = Offset/EltSize;
786 assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
787 }
788 // Return the element extracted out of it.
789 Value *Idx;
790 if (NonConstantIdx) {
791 if (Elt)
792 Idx = Builder.CreateAdd(NonConstantIdx,
793 Builder.getInt32(Elt),
794 "dyn.offset");
795 else
796 Idx = NonConstantIdx;
797 } else
798 Idx = Builder.getInt32(Elt);
799 Value *V = Builder.CreateExtractElement(FromVal, Idx);
800 if (V->getType() != ToType)
801 V = Builder.CreateBitCast(V, ToType);
802 return V;
803 }
804
805 // If ToType is a first class aggregate, extract out each of the pieces and
806 // use insertvalue's to form the FCA.
807 if (StructType *ST = dyn_cast(ToType)) {
808 assert(!NonConstantIdx &&
809 "Dynamic indexing into struct types not supported");
810 const StructLayout &Layout = *DL.getStructLayout(ST);
811 Value *Res = UndefValue::get(ST);
812 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
813 Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
814 Offset+Layout.getElementOffsetInBits(i),
815 nullptr, Builder);
816 Res = Builder.CreateInsertValue(Res, Elt, i);
817 }
818 return Res;
819 }
820
821 if (ArrayType *AT = dyn_cast(ToType)) {
822 assert(!NonConstantIdx &&
823 "Dynamic indexing into array types not supported");
824 uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
825 Value *Res = UndefValue::get(AT);
826 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
827 Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
828 Offset+i*EltSize, nullptr,
829 Builder);
830 Res = Builder.CreateInsertValue(Res, Elt, i);
831 }
832 return Res;
833 }
834
835 // Otherwise, this must be a union that was converted to an integer value.
836 IntegerType *NTy = cast(FromVal->getType());
837
838 // If this is a big-endian system and the load is narrower than the
839 // full alloca type, we need to do a shift to get the right bits.
840 int ShAmt = 0;
841 if (DL.isBigEndian()) {
842 // On big-endian machines, the lowest bit is stored at the bit offset
843 // from the pointer given by getTypeStoreSizeInBits. This matters for
844 // integers with a bitwidth that is not a multiple of 8.
845 ShAmt = DL.getTypeStoreSizeInBits(NTy) -
846 DL.getTypeStoreSizeInBits(ToType) - Offset;
847 } else {
848 ShAmt = Offset;
849 }
850
851 // Note: we support negative bitwidths (with shl) which are not defined.
852 // We do this to support (f.e.) loads off the end of a structure where
853 // only some bits are used.
854 if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
855 FromVal = Builder.CreateLShr(FromVal,
856 ConstantInt::get(FromVal->getType(), ShAmt));
857 else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
858 FromVal = Builder.CreateShl(FromVal,
859 ConstantInt::get(FromVal->getType(), -ShAmt));
860
861 // Finally, unconditionally truncate the integer to the right width.
862 unsigned LIBitWidth = DL.getTypeSizeInBits(ToType);
863 if (LIBitWidth < NTy->getBitWidth())
864 FromVal =
865 Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
866 LIBitWidth));
867 else if (LIBitWidth > NTy->getBitWidth())
868 FromVal =
869 Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
870 LIBitWidth));
871
872 // If the result is an integer, this is a trunc or bitcast.
873 if (ToType->isIntegerTy()) {
874 // Should be done.
875 } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
876 // Just do a bitcast, we know the sizes match up.
877 FromVal = Builder.CreateBitCast(FromVal, ToType);
878 } else {
879 // Otherwise must be a pointer.
880 FromVal = Builder.CreateIntToPtr(FromVal, ToType);
881 }
882 assert(FromVal->getType() == ToType && "Didn't convert right?");
883 return FromVal;
884 }
885
886 /// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
887 /// or vector value "Old" at the offset specified by Offset.
888 ///
889 /// This happens when we are converting an "integer union" to a
890 /// single integer scalar, or when we are converting a "vector union" to a
891 /// vector with insert/extractelement instructions.
892 ///
893 /// Offset is an offset from the original alloca, in bits that need to be
894 /// shifted to the right.
895 ///
896 /// NonConstantIdx is an index value if there was a GEP with a non-constant
897 /// index value. If this is 0 then all GEPs used to find this insert address
898 /// are constant.
899 Value *ConvertToScalarInfo::
900 ConvertScalar_InsertValue(Value *SV, Value *Old,
901 uint64_t Offset, Value* NonConstantIdx,
902 IRBuilder<> &Builder) {
903 // Convert the stored type to the actual type, shift it left to insert
904 // then 'or' into place.
905 Type *AllocaType = Old->getType();
906 LLVMContext &Context = Old->getContext();
907
908 if (VectorType *VTy = dyn_cast(AllocaType)) {
909 uint64_t VecSize = DL.getTypeAllocSizeInBits(VTy);
910 uint64_t ValSize = DL.getTypeAllocSizeInBits(SV->getType());
911
912 // Changing the whole vector with memset or with an access of a different
913 // vector type?
914 if (ValSize == VecSize)
915 return Builder.CreateBitCast(SV, AllocaType);
916
917 // Must be an element insertion.
918 Type *EltTy = VTy->getElementType();
919 if (SV->getType() != EltTy)
920 SV = Builder.CreateBitCast(SV, EltTy);
921 uint64_t EltSize = DL.getTypeAllocSizeInBits(EltTy);
922 unsigned Elt = Offset/EltSize;
923 Value *Idx;
924 if (NonConstantIdx) {
925 if (Elt)
926 Idx = Builder.CreateAdd(NonConstantIdx,
927 Builder.getInt32(Elt),
928 "dyn.offset");
929 else
930 Idx = NonConstantIdx;
931 } else
932 Idx = Builder.getInt32(Elt);
933 return Builder.CreateInsertElement(Old, SV, Idx);
934 }
935
936 // If SV is a first-class aggregate value, insert each value recursively.
937 if (StructType *ST = dyn_cast(SV->getType())) {
938 assert(!NonConstantIdx &&
939 "Dynamic indexing into struct types not supported");
940 const StructLayout &Layout = *DL.getStructLayout(ST);
941 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
942 Value *Elt = Builder.CreateExtractValue(SV, i);
943 Old = ConvertScalar_InsertValue(Elt, Old,
944 Offset+Layout.getElementOffsetInBits(i),
945 nullptr, Builder);
946 }
947 return Old;
948 }
949
950 if (ArrayType *AT = dyn_cast(SV->getType())) {
951 assert(!NonConstantIdx &&
952 "Dynamic indexing into array types not supported");
953 uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
954 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
955 Value *Elt = Builder.CreateExtractValue(SV, i);
956 Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr,
957 Builder);
958 }
959 return Old;
960 }
961
962 // If SV is a float, convert it to the appropriate integer type.
963 // If it is a pointer, do the same.
964 unsigned SrcWidth = DL.getTypeSizeInBits(SV->getType());
965 unsigned DestWidth = DL.getTypeSizeInBits(AllocaType);
966 unsigned SrcStoreWidth = DL.getTypeStoreSizeInBits(SV->getType());
967 unsigned DestStoreWidth = DL.getTypeStoreSizeInBits(AllocaType);
968 if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
969 SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
970 else if (SV->getType()->isPointerTy())
971 SV = Builder.CreatePtrToInt(SV, DL.getIntPtrType(SV->getType()));
972
973 // Zero extend or truncate the value if needed.
974 if (SV->getType() != AllocaType) {
975 if (SV->getType()->getPrimitiveSizeInBits() <
976 AllocaType->getPrimitiveSizeInBits())
977 SV = Builder.CreateZExt(SV, AllocaType);
978 else {
979 // Truncation may be needed if storing more than the alloca can hold
980 // (undefined behavior).
981 SV = Builder.CreateTrunc(SV, AllocaType);
982 SrcWidth = DestWidth;
983 SrcStoreWidth = DestStoreWidth;
984 }
985 }
986
987 // If this is a big-endian system and the store is narrower than the
988 // full alloca type, we need to do a shift to get the right bits.
989 int ShAmt = 0;
990 if (DL.isBigEndian()) {
991 // On big-endian machines, the lowest bit is stored at the bit offset
992 // from the pointer given by getTypeStoreSizeInBits. This matters for
993 // integers with a bitwidth that is not a multiple of 8.
994 ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
995 } else {
996 ShAmt = Offset;
997 }
998
999 // Note: we support negative bitwidths (with shr) which are not defined.
1000 // We do this to support (f.e.) stores off the end of a structure where
1001 // only some bits in the structure are set.
1002 APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
1003 if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
1004 SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
1005 Mask <<= ShAmt;
1006 } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
1007 SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt));
1008 Mask = Mask.lshr(-ShAmt);
1009 }
1010
1011 // Mask out the bits we are about to insert from the old value, and or
1012 // in the new bits.
1013 if (SrcWidth != DestWidth) {
1014 assert(DestWidth > SrcWidth);
1015 Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
1016 SV = Builder.CreateOr(Old, SV, "ins");
1017 }
1018 return SV;
1019 }
1020
1021
1022 //===----------------------------------------------------------------------===//
1023 // SRoA Driver
1024 //===----------------------------------------------------------------------===//
1025
1026
1027 bool SROA::runOnFunction(Function &F) {
1028 if (skipFunction(F))
1029 return false;
1030
1031 bool Changed = performPromotion(F);
1032
1033 while (1) {
1034 bool LocalChange = performScalarRepl(F);
1035 if (!LocalChange) break; // No need to repromote if no scalarrepl
1036 Changed = true;
1037 LocalChange = performPromotion(F);
1038 if (!LocalChange) break; // No need to re-scalarrepl if no promotion
1039 }
1040
1041 return Changed;
1042 }
1043
1044 namespace {
1045 class AllocaPromoter : public LoadAndStorePromoter {
1046 AllocaInst *AI;
1047 DIBuilder *DIB;
1048 SmallVector DDIs;
1049 SmallVector DVIs;
1050 public:
1051 AllocaPromoter(ArrayRef Insts, SSAUpdater &S,
1052 DIBuilder *DB)
1053 : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {}
1054
1055 void run(AllocaInst *AI, const SmallVectorImpl &Insts) {
1056 // Remember which alloca we're promoting (for isInstInList).
1057 this->AI = AI;
1058 if (auto *L = LocalAsMetadata::getIfExists(AI)) {
1059 if (auto *DINode = MetadataAsValue::getIfExists(AI->getContext(), L)) {
1060 for (User *U : DINode->users())
1061 if (DbgDeclareInst *DDI = dyn_cast(U))
1062 DDIs.push_back(DDI);
1063 else if (DbgValueInst *DVI = dyn_cast(U))
1064 DVIs.push_back(DVI);
1065 }
1066 }
1067
1068 LoadAndStorePromoter::run(Insts);
1069 AI->eraseFromParent();
1070 for (SmallVectorImpl::iterator I = DDIs.begin(),
1071 E = DDIs.end(); I != E; ++I) {
1072 DbgDeclareInst *DDI = *I;
1073 DDI->eraseFromParent();
1074 }
1075 for (SmallVectorImpl::iterator I = DVIs.begin(),
1076 E = DVIs.end(); I != E; ++I) {
1077 DbgValueInst *DVI = *I;
1078 DVI->eraseFromParent();
1079 }
1080 }
1081
1082 bool isInstInList(Instruction *I,
1083 const SmallVectorImpl &Insts) const override {
1084 if (LoadInst *LI = dyn_cast(I))
1085 return LI->getOperand(0) == AI;
1086 return cast(I)->getPointerOperand() == AI;
1087 }
1088
1089 void updateDebugInfo(Instruction *Inst) const override {
1090 for (SmallVectorImpl::const_iterator I = DDIs.begin(),
1091 E = DDIs.end(); I != E; ++I) {
1092 DbgDeclareInst *DDI = *I;
1093 if (StoreInst *SI = dyn_cast(Inst))
1094 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
1095 else if (LoadInst *LI = dyn_cast(Inst))
1096 ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
1097 }
1098 for (SmallVectorImpl::const_iterator I = DVIs.begin(),
1099 E = DVIs.end(); I != E; ++I) {
1100 DbgValueInst *DVI = *I;
1101 Value *Arg = nullptr;
1102 if (StoreInst *SI = dyn_cast(Inst)) {
1103 // If an argument is zero extended then use argument directly. The ZExt
1104 // may be zapped by an optimization pass in future.
1105 if (ZExtInst *ZExt = dyn_cast(SI->getOperand(0)))
1106 Arg = dyn_cast(ZExt->getOperand(0));
1107 if (SExtInst *SExt = dyn_cast(SI->getOperand(0)))
1108 Arg = dyn_cast(SExt->getOperand(0));
1109 if (!Arg)
1110 Arg = SI->getOperand(0);
1111 } else if (LoadInst *LI = dyn_cast(Inst)) {
1112 Arg = LI->getOperand(0);
1113 } else {
1114 continue;
1115 }
1116 DIB->insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
1117 DVI->getExpression(), DVI->getDebugLoc(),
1118 Inst);
1119 }
1120 }
1121 };
1122 } // end anon namespace
1123
1124 /// isSafeSelectToSpeculate - Select instructions that use an alloca and are
1125 /// subsequently loaded can be rewritten to load both input pointers and then
1126 /// select between the result, allowing the load of the alloca to be promoted.
1127 /// From this:
1128 /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
1129 /// %V = load i32* %P2
1130 /// to:
1131 /// %V1 = load i32* %Alloca -> will be mem2reg'd
1132 /// %V2 = load i32* %Other
1133 /// %V = select i1 %cond, i32 %V1, i32 %V2
1134 ///
1135 /// We can do this to a select if its only uses are loads and if the operand to
1136 /// the select can be loaded unconditionally.
1137 static bool isSafeSelectToSpeculate(SelectInst *SI) {
1138 const DataLayout &DL = SI->getModule()->getDataLayout();
1139
1140 for (User *U : SI->users()) {
1141 LoadInst *LI = dyn_cast(U);
1142 if (!LI || !LI->isSimple()) return false;
1143
1144 // Both operands to the select need to be dereferencable, either absolutely
1145 // (e.g. allocas) or at this point because we can see other accesses to it.
1146 if (!isSafeToLoadUnconditionally(SI->getTrueValue(), LI->getAlignment(),
1147 DL, LI))
1148 return false;
1149 if (!isSafeToLoadUnconditionally(SI->getFalseValue(), LI->getAlignment(),
1150 DL, LI))
1151 return false;
1152 }
1153
1154 return true;
1155 }
1156
1157 /// isSafePHIToSpeculate - PHI instructions that use an alloca and are
1158 /// subsequently loaded can be rewritten to load both input pointers in the pred
1159 /// blocks and then PHI the results, allowing the load of the alloca to be
1160 /// promoted.
1161 /// From this:
1162 /// %P2 = phi [i32* %Alloca, i32* %Other]
1163 /// %V = load i32* %P2
1164 /// to:
1165 /// %V1 = load i32* %Alloca -> will be mem2reg'd
1166 /// ...
1167 /// %V2 = load i32* %Other
1168 /// ...
1169 /// %V = phi [i32 %V1, i32 %V2]
1170 ///
1171 /// We can do this to a select if its only uses are loads and if the operand to
1172 /// the select can be loaded unconditionally.
1173 static bool isSafePHIToSpeculate(PHINode *PN) {
1174 // For now, we can only do this promotion if the load is in the same block as
1175 // the PHI, and if there are no stores between the phi and load.
1176 // TODO: Allow recursive phi users.
1177 // TODO: Allow stores.
1178 BasicBlock *BB = PN->getParent();
1179 unsigned MaxAlign = 0;
1180 for (User *U : PN->users()) {
1181 LoadInst *LI = dyn_cast(U);
1182 if (!LI || !LI->isSimple()) return false;
1183
1184 // For now we only allow loads in the same block as the PHI. This is a
1185 // common case that happens when instcombine merges two loads through a PHI.
1186 if (LI->getParent() != BB) return false;
1187
1188 // Ensure that there are no instructions between the PHI and the load that
1189 // could store.
1190 for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1191 if (BBI->mayWriteToMemory())
1192 return false;
1193
1194 MaxAlign = std::max(MaxAlign, LI->getAlignment());
1195 }
1196
1197 const DataLayout &DL = PN->getModule()->getDataLayout();
1198
1199 // Okay, we know that we have one or more loads in the same block as the PHI.
1200 // We can transform this if it is safe to push the loads into the predecessor
1201 // blocks. The only thing to watch out for is that we can't put a possibly
1202 // trapping load in the predecessor if it is a critical edge.
1203 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1204 BasicBlock *Pred = PN->getIncomingBlock(i);
1205 Value *InVal = PN->getIncomingValue(i);
1206
1207 // If the terminator of the predecessor has side-effects (an invoke),
1208 // there is no safe place to put a load in the predecessor.
1209 if (Pred->getTerminator()->mayHaveSideEffects())
1210 return false;
1211
1212 // If the value is produced by the terminator of the predecessor
1213 // (an invoke), there is no valid place to put a load in the predecessor.
1214 if (Pred->getTerminator() == InVal)
1215 return false;
1216
1217 // If the predecessor has a single successor, then the edge isn't critical.
1218 if (Pred->getTerminator()->getNumSuccessors() == 1)
1219 continue;
1220
1221 // If this pointer is always safe to load, or if we can prove that there is
1222 // already a load in the block, then we can move the load to the pred block.
1223 if (isSafeToLoadUnconditionally(InVal, MaxAlign, DL, Pred->getTerminator()))
1224 continue;
1225
1226 return false;
1227 }
1228
1229 return true;
1230 }
1231
1232
1233 /// tryToMakeAllocaBePromotable - This returns true if the alloca only has
1234 /// direct (non-volatile) loads and stores to it. If the alloca is close but
1235 /// not quite there, this will transform the code to allow promotion. As such,
1236 /// it is a non-pure predicate.
1237 static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) {
1238 SetVector,
1239 SmallPtrSet > InstsToRewrite;
1240 for (User *U : AI->users()) {
1241 if (LoadInst *LI = dyn_cast(U)) {
1242 if (!LI->isSimple())
1243 return false;
1244 continue;
1245 }
1246
1247 if (StoreInst *SI = dyn_cast(U)) {
1248 if (SI->getOperand(0) == AI || !SI->isSimple())
1249 return false; // Don't allow a store OF the AI, only INTO the AI.
1250 continue;
1251 }
1252
1253 if (SelectInst *SI = dyn_cast(U)) {
1254 // If the condition being selected on is a constant, fold the select, yes
1255 // this does (rarely) happen early on.
1256 if (ConstantInt *CI = dyn_cast(SI->getCondition())) {
1257 Value *Result = SI->getOperand(1+CI->isZero());
1258 SI->replaceAllUsesWith(Result);
1259 SI->eraseFromParent();
1260
1261 // This is very rare and we just scrambled the use list of AI, start
1262 // over completely.
1263 return tryToMakeAllocaBePromotable(AI, DL);
1264 }
1265
1266 // If it is safe to turn "load (select c, AI, ptr)" into a select of two
1267 // loads, then we can transform this by rewriting the select.
1268 if (!isSafeSelectToSpeculate(SI))
1269 return false;
1270
1271 InstsToRewrite.insert(SI);
1272 continue;
1273 }
1274
1275 if (PHINode *PN = dyn_cast(U)) {
1276 if (PN->use_empty()) { // Dead PHIs can be stripped.
1277 InstsToRewrite.insert(PN);
1278 continue;
1279 }
1280
1281 // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
1282 // in the pred blocks, then we can transform this by rewriting the PHI.
1283 if (!isSafePHIToSpeculate(PN))
1284 return false;
1285
1286 InstsToRewrite.insert(PN);
1287 continue;
1288 }
1289
1290 if (BitCastInst *BCI = dyn_cast(U)) {
1291 if (onlyUsedByLifetimeMarkers(BCI)) {
1292 InstsToRewrite.insert(BCI);
1293 continue;
1294 }
1295 }
1296
1297 return false;
1298 }
1299
1300 // If there are no instructions to rewrite, then all uses are load/stores and
1301 // we're done!
1302 if (InstsToRewrite.empty())
1303 return true;
1304
1305 // If we have instructions that need to be rewritten for this to be promotable
1306 // take care of it now.
1307 for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
1308 if (BitCastInst *BCI = dyn_cast(InstsToRewrite[i])) {
1309 // This could only be a bitcast used by nothing but lifetime intrinsics.
1310 for (BitCastInst::user_iterator I = BCI->user_begin(), E = BCI->user_end();
1311 I != E;)
1312 cast(*I++)->eraseFromParent();
1313 BCI->eraseFromParent();
1314 continue;
1315 }
1316
1317 if (SelectInst *SI = dyn_cast(InstsToRewrite[i])) {
1318 // Selects in InstsToRewrite only have load uses. Rewrite each as two
1319 // loads with a new select.
1320 while (!SI->use_empty()) {
1321 LoadInst *LI = cast(SI->user_back());
1322
1323 IRBuilder<> Builder(LI);
1324 LoadInst *TrueLoad =
1325 Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
1326 LoadInst *FalseLoad =
1327 Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
1328
1329 // Transfer alignment and AA info if present.
1330 TrueLoad->setAlignment(LI->getAlignment());
1331 FalseLoad->setAlignment(LI->getAlignment());
1332
1333 AAMDNodes Tags;
1334 LI->getAAMetadata(Tags);
1335 if (Tags) {
1336 TrueLoad->setAAMetadata(Tags);
1337 FalseLoad->setAAMetadata(Tags);
1338 }
1339
1340 Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
1341 V->takeName(LI);
1342 LI->replaceAllUsesWith(V);
1343 LI->eraseFromParent();
1344 }
1345
1346 // Now that all the loads are gone, the select is gone too.
1347 SI->eraseFromParent();
1348 continue;
1349 }
1350
1351 // Otherwise, we have a PHI node which allows us to push the loads into the
1352 // predecessors.
1353 PHINode *PN = cast(InstsToRewrite[i]);
1354 if (PN->use_empty()) {
1355 PN->eraseFromParent();
1356 continue;
1357 }
1358
1359 Type *LoadTy = AI->getAllocatedType();
1360 PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
1361 PN->getName()+".ld", PN);
1362
1363 // Get the AA tags and alignment to use from one of the loads. It doesn't
1364 // matter which one we get and if any differ, it doesn't matter.
1365 LoadInst *SomeLoad = cast(PN->user_back());
1366
1367 AAMDNodes AATags;
1368 SomeLoad->getAAMetadata(AATags);
1369 unsigned Align = SomeLoad->getAlignment();
1370
1371 // Rewrite all loads of the PN to use the new PHI.
1372 while (!PN->use_empty()) {
1373 LoadInst *LI = cast(PN->user_back());
1374 LI->replaceAllUsesWith(NewPN);
1375 LI->eraseFromParent();
1376 }
1377
1378 // Inject loads into all of the pred blocks. Keep track of which blocks we
1379 // insert them into in case we have multiple edges from the same block.
1380 DenseMap InsertedLoads;
1381
1382 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1383 BasicBlock *Pred = PN->getIncomingBlock(i);
1384 LoadInst *&Load = InsertedLoads[Pred];
1385 if (!Load) {
1386 Load = new LoadInst(PN->getIncomingValue(i),
1387 PN->getName() + "." + Pred->getName(),
1388 Pred->getTerminator());
1389 Load->setAlignment(Align);
1390 if (AATags) Load->setAAMetadata(AATags);
1391 }
1392
1393 NewPN->addIncoming(Load, Pred);
1394 }
1395
1396 PN->eraseFromParent();
1397 }
1398
1399 ++NumAdjusted;
1400 return true;
1401 }
1402
1403 bool SROA::performPromotion(Function &F) {
1404 std::vector Allocas;
1405 const DataLayout &DL = F.getParent()->getDataLayout();
1406 DominatorTree *DT = nullptr;
1407 if (HasDomTree)
1408 DT = &getAnalysis().getDomTree();
1409 AssumptionCache &AC =
1410 getAnalysis().getAssumptionCache(F);
1411
1412 BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
1413 DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
1414 bool Changed = false;
1415 SmallVector Insts;
1416 while (1) {
1417 Allocas.clear();
1418
1419 // Find allocas that are safe to promote, by looking at all instructions in
1420 // the entry node
1421 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
1422 if (AllocaInst *AI = dyn_cast(I)) // Is it an alloca?
1423 if (tryToMakeAllocaBePromotable(AI, DL))
1424 Allocas.push_back(AI);
1425
1426 if (Allocas.empty()) break;
1427
1428 if (HasDomTree)
1429 PromoteMemToReg(Allocas, *DT, nullptr, &AC);
1430 else {
1431 SSAUpdater SSA;
1432 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
1433 AllocaInst *AI = Allocas[i];
1434
1435 // Build list of instructions to promote.
1436 for (User *U : AI->users())
1437 Insts.push_back(cast(U));
1438 AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
1439 Insts.clear();
1440 }
1441 }
1442 NumPromoted += Allocas.size();
1443 Changed = true;
1444 }
1445
1446 return Changed;
1447 }
1448
1449
1450 /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
1451 /// SROA. It must be a struct or array type with a small number of elements.
1452 bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
1453 Type *T = AI->getAllocatedType();
1454 // Do not promote any struct that has too many members.
1455 if (StructType *ST = dyn_cast(T))
1456 return ST->getNumElements() <= StructMemberThreshold;
1457 // Do not promote any array that has too many elements.
1458 if (ArrayType *AT = dyn_cast(T))
1459 return AT->getNumElements() <= ArrayElementThreshold;
1460 return false;
1461 }
1462
1463 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
1464 // which runs on all of the alloca instructions in the entry block, removing
1465 // them if they are only used by getelementptr instructions.
1466 //
1467 bool SROA::performScalarRepl(Function &F) {
1468 std::vector WorkList;
1469 const DataLayout &DL = F.getParent()->getDataLayout();
1470
1471 // Scan the entry basic block, adding allocas to the worklist.
1472 BasicBlock &BB = F.getEntryBlock();
1473 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
1474 if (AllocaInst *A = dyn_cast(I))
1475 WorkList.push_back(A);
1476
1477 // Process the worklist
1478 bool Changed = false;
1479 while (!WorkList.empty()) {
1480 AllocaInst *AI = WorkList.back();
1481 WorkList.pop_back();
1482
1483 // Handle dead allocas trivially. These can be formed by SROA'ing arrays
1484 // with unused elements.
1485 if (AI->use_empty()) {
1486 AI->eraseFromParent();
1487 Changed = true;
1488 continue;
1489 }
1490
1491 // If this alloca is impossible for us to promote, reject it early.
1492 if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
1493 continue;
1494
1495 // Check to see if we can perform the core SROA transformation. We cannot
1496 // transform the allocation instruction if it is an array allocation
1497 // (allocations OF arrays are ok though), and an allocation of a scalar
1498 // value cannot be decomposed at all.
1499 uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType());
1500
1501 // Do not promote [0 x %struct].
1502 if (AllocaSize == 0) continue;
1503
1504 // Do not promote any struct whose size is too big.
1505 if (AllocaSize > SRThreshold) continue;
1506
1507 // If the alloca looks like a good candidate for scalar replacement, and if
1508 // all its users can be transformed, then split up the aggregate into its
1509 // separate elements.
1510 if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
1511 DoScalarReplacement(AI, WorkList);
1512 Changed = true;
1513 continue;
1514 }
1515
1516 // If we can turn this aggregate value (potentially with casts) into a
1517 // simple scalar value that can be mem2reg'd into a register value.
1518 // IsNotTrivial tracks whether this is something that mem2reg could have
1519 // promoted itself. If so, we don't want to transform it needlessly. Note
1520 // that we can't just check based on the type: the alloca may be of an i32
1521 // but that has pointer arithmetic to set byte 3 of it or something.
1522 if (AllocaInst *NewAI =
1523 ConvertToScalarInfo((unsigned)AllocaSize, DL, ScalarLoadThreshold)
1524 .TryConvert(AI)) {
1525 NewAI->takeName(AI);
1526 AI->eraseFromParent();
1527 ++NumConverted;
1528 Changed = true;
1529 continue;
1530 }
1531
1532 // Otherwise, couldn't process this alloca.
1533 }
1534
1535 return Changed;
1536 }
1537
1538 /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
1539 /// predicate, do SROA now.
1540 void SROA::DoScalarReplacement(AllocaInst *AI,
1541 std::vector &WorkList) {
1542 DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
1543 SmallVector ElementAllocas;
1544 if (StructType *ST = dyn_cast(AI->getAllocatedType())) {
1545 ElementAllocas.reserve(ST->getNumContainedTypes());
1546 for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
1547 AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr,
1548 AI->getAlignment(),
1549 AI->getName() + "." + Twine(i), AI);
1550 ElementAllocas.push_back(NA);
1551 WorkList.push_back(NA); // Add to worklist for recursive processing
1552 }
1553 } else {
1554 ArrayType *AT = cast(AI->getAllocatedType());
1555 ElementAllocas.reserve(AT->getNumElements());
1556 Type *ElTy = AT->getElementType();
1557 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
1558 AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(),
1559 AI->getName() + "." + Twine(i), AI);
1560 ElementAllocas.push_back(NA);
1561 WorkList.push_back(NA); // Add to worklist for recursive processing
1562 }
1563 }
1564
1565 // Now that we have created the new alloca instructions, rewrite all the
1566 // uses of the old alloca.
1567 RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
1568
1569 // Now erase any instructions that were made dead while rewriting the alloca.
1570 DeleteDeadInstructions();
1571 AI->eraseFromParent();
1572
1573 ++NumReplaced;
1574 }
1575
1576 /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
1577 /// recursively including all their operands that become trivially dead.
1578 void SROA::DeleteDeadInstructions() {
1579 while (!DeadInsts.empty()) {
1580 Instruction *I = cast(DeadInsts.pop_back_val());
1581
1582 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
1583 if (Instruction *U = dyn_cast(*OI)) {
1584 // Zero out the operand and see if it becomes trivially dead.
1585 // (But, don't add allocas to the dead instruction list -- they are
1586 // already on the worklist and will be deleted separately.)
1587 *OI = nullptr;
1588 if (isInstructionTriviallyDead(U) && !isa(U))
1589 DeadInsts.push_back(U);
1590 }
1591
1592 I->eraseFromParent();
1593 }
1594 }
1595
1596 /// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
1597 /// performing scalar replacement of alloca AI. The results are flagged in
1598 /// the Info parameter. Offset indicates the position within AI that is
1599 /// referenced by this instruction.
1600 void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
1601 AllocaInfo &Info) {
1602 const DataLayout &DL = I->getModule()->getDataLayout();
1603 for (Use &U : I->uses()) {
1604 Instruction *User = cast(U.getUser());
1605
1606 if (BitCastInst *BC = dyn_cast(User)) {
1607 isSafeForScalarRepl(BC, Offset, Info);
1608 } else if (GetElementPtrInst *GEPI = dyn_cast(User)) {
1609 uint64_t GEPOffset = Offset;
1610 isSafeGEP(GEPI, GEPOffset, Info);
1611 if (!Info.isUnsafe)
1612 isSafeForScalarRepl(GEPI, GEPOffset, Info);
1613 } else if (MemIntrinsic *MI = dyn_cast(User)) {
1614 ConstantInt *Length = dyn_cast(MI->getLength());
1615 if (!Length || Length->isNegative())
1616 return MarkUnsafe(Info, User);
1617
1618 isSafeMemAccess(Offset, Length->getZExtValue(), nullptr,
1619 U.getOperandNo() == 0, Info, MI,
1620 true /*AllowWholeAccess*/);
1621 } else if (LoadInst *LI = dyn_cast(User)) {
1622 if (!LI->isSimple())
1623 return MarkUnsafe(Info, User);
1624 Type *LIType = LI->getType();
1625 isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
1626 LI, true /*AllowWholeAccess*/);
1627 Info.hasALoadOrStore = true;
1628
1629 } else if (StoreInst *SI = dyn_cast(User)) {
1630 // Store is ok if storing INTO the pointer, not storing the pointer
1631 if (!SI->isSimple() || SI->getOperand(0) == I)
1632 return MarkUnsafe(Info, User);
1633
1634 Type *SIType = SI->getOperand(0)->getType();
1635 isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
1636 SI, true /*AllowWholeAccess*/);
1637 Info.hasALoadOrStore = true;
1638 } else if (IntrinsicInst *II = dyn_cast(User)) {
1639 if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
1640 II->getIntrinsicID() != Intrinsic::lifetime_end)
1641 return MarkUnsafe(Info, User);
1642 } else if (isa(User) || isa(User)) {
1643 isSafePHISelectUseForScalarRepl(User, Offset, Info);
1644 } else {
1645 return MarkUnsafe(Info, User);
1646 }
1647 if (Info.isUnsafe) return;
1648 }
1649 }
1650
1651
1652 /// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
1653 /// derived from the alloca, we can often still split the alloca into elements.
1654 /// This is useful if we have a large alloca where one element is phi'd
1655 /// together somewhere: we can SRoA and promote all the other elements even if
1656 /// we end up not being able to promote this one.
1657 ///
1658 /// All we require is that the uses of the PHI do not index into other parts of
1659 /// the alloca. The most important use case for this is single load and stores
1660 /// that are PHI'd together, which can happen due to code sinking.
1661 void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
1662 AllocaInfo &Info) {
1663 // If we've already checked this PHI, don't do it again.
1664 if (PHINode *PN = dyn_cast(I))
1665 if (!Info.CheckedPHIs.insert(PN).second)
1666 return;
1667
1668 const DataLayout &DL = I->getModule()->getDataLayout();
1669 for (User *U : I->users()) {
1670 Instruction *UI = cast(U);
1671
1672 if (BitCastInst *BC = dyn_cast(UI)) {
1673 isSafePHISelectUseForScalarRepl(BC, Offset, Info);
1674 } else if (GetElementPtrInst *GEPI = dyn_cast(UI)) {
1675 // Only allow "bitcast" GEPs for simplicity. We could generalize this,
1676 // but would have to prove that we're staying inside of an element being
1677 // promoted.
1678 if (!GEPI->hasAllZeroIndices())
1679 return MarkUnsafe(Info, UI);
1680 isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
1681 } else if (LoadInst *LI = dyn_cast(UI)) {
1682 if (!LI->isSimple())
1683 return MarkUnsafe(Info, UI);
1684 Type *LIType = LI->getType();
1685 isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
1686 LI, false /*AllowWholeAccess*/);
1687 Info.hasALoadOrStore = true;
1688
1689 } else if (StoreInst *SI = dyn_cast(UI)) {
1690 // Store is ok if storing INTO the pointer, not storing the pointer
1691 if (!SI->isSimple() || SI->getOperand(0) == I)
1692 return MarkUnsafe(Info, UI);
1693
1694 Type *SIType = SI->getOperand(0)->getType();
1695 isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
1696 SI, false /*AllowWholeAccess*/);
1697 Info.hasALoadOrStore = true;
1698 } else if (isa(UI) || isa(UI)) {
1699 isSafePHISelectUseForScalarRepl(UI, Offset, Info);
1700 } else {
1701 return MarkUnsafe(Info, UI);
1702 }
1703 if (Info.isUnsafe) return;
1704 }
1705 }
1706
1707 /// isSafeGEP - Check if a GEP instruction can be handled for scalar
1708 /// replacement. It is safe when all the indices are constant, in-bounds
1709 /// references, and when the resulting offset corresponds to an element within
1710 /// the alloca type. The results are flagged in the Info parameter. Upon
1711 /// return, Offset is adjusted as specified by the GEP indices.
1712 void SROA::isSafeGEP(GetElementPtrInst *GEPI,
1713 uint64_t &Offset, AllocaInfo &Info) {
1714 gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
1715 if (GEPIt == E)
1716 return;
1717 bool NonConstant = false;
1718 unsigned NonConstantIdxSize = 0;
1719
1720 // Walk through the GEP type indices, checking the types that this indexes
1721 // into.
1722 for (; GEPIt != E; ++GEPIt) {
1723 // Ignore struct elements, no extra checking needed for these.
1724 if ((*GEPIt)->isStructTy())
1725 continue;
1726
1727 ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand());
1728 if (!IdxVal)
1729 return MarkUnsafe(Info, GEPI);
1730 }
1731
1732 // Compute the offset due to this GEP and check if the alloca has a
1733 // component element at that offset.
1734 SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end());
1735 // If this GEP is non-constant then the last operand must have been a
1736 // dynamic index into a vector. Pop this now as it has no impact on the
1737 // constant part of the offset.
1738 if (NonConstant)
1739 Indices.pop_back();
1740
1741 const DataLayout &DL = GEPI->getModule()->getDataLayout();
1742 Offset += DL.getIndexedOffsetInType(GEPI->getSourceElementType(), Indices);
1743 if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize,
1744 DL))
1745 MarkUnsafe(Info, GEPI);
1746 }
1747
1748 /// isHomogeneousAggregate - Check if type T is a struct or array containing
1749 /// elements of the same type (which is always true for arrays). If so,
1750 /// return true with NumElts and EltTy set to the number of elements and the
1751 /// element type, respectively.
1752 static bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
1753 Type *&EltTy) {
1754 if (ArrayType *AT = dyn_cast(T)) {
1755 NumElts = AT->getNumElements();
1756 EltTy = (NumElts == 0 ? nullptr : AT->getElementType());
1757 return true;
1758 }
1759 if (StructType *ST = dyn_cast(T)) {
1760 NumElts = ST->getNumContainedTypes();
1761 EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0));
1762 for (unsigned n = 1; n < NumElts; ++n) {
1763 if (ST->getContainedType(n) != EltTy)
1764 return false;
1765 }
1766 return true;
1767 }
1768 return false;
1769 }
1770
1771 /// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
1772 /// "homogeneous" aggregates with the same element type and number of elements.
1773 static bool isCompatibleAggregate(Type *T1, Type *T2) {
1774 if (T1 == T2)
1775 return true;
1776
1777 unsigned NumElts1, NumElts2;
1778 Type *EltTy1, *EltTy2;
1779 if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
1780 isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
1781 NumElts1 == NumElts2 &&
1782 EltTy1 == EltTy2)
1783 return true;
1784
1785 return false;
1786 }
1787
1788 /// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
1789 /// alloca or has an offset and size that corresponds to a component element
1790 /// within it. The offset checked here may have been formed from a GEP with a
1791 /// pointer bitcasted to a different type.
1792 ///
1793 /// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
1794 /// unit. If false, it only allows accesses known to be in a single element.
1795 void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
1796 Type *MemOpType, bool isStore,
1797 AllocaInfo &Info, Instruction *TheAccess,
1798 bool AllowWholeAccess) {
1799 const DataLayout &DL = TheAccess->getModule()->getDataLayout();
1800 // Check if this is a load/store of the entire alloca.
1801 if (Offset == 0 && AllowWholeAccess &&
1802 MemSize == DL.getTypeAllocSize(Info.AI->getAllocatedType())) {
1803 // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
1804 // loads/stores (which are essentially the same as the MemIntrinsics with
1805 // regard to copying padding between elements). But, if an alloca is
1806 // flagged as both a source and destination of such operations, we'll need
1807 // to check later for padding between elements.
1808 if (!MemOpType || MemOpType->isIntegerTy()) {
1809 if (isStore)
1810 Info.isMemCpyDst = true;
1811 else
1812 Info.isMemCpySrc = true;
1813 return;
1814 }
1815 // This is also safe for references using a type that is compatible with
1816 // the type of the alloca, so that loads/stores can be rewritten using
1817 // insertvalue/extractvalue.
1818 if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
1819 Info.hasSubelementAccess = true;
1820 return;
1821 }
1822 }
1823 // Check if the offset/size correspond to a component within the alloca type.
1824 Type *T = Info.AI->getAllocatedType();
1825 if (TypeHasComponent(T, Offset, MemSize, DL)) {
1826 Info.hasSubelementAccess = true;
1827 return;
1828 }
1829
1830 return MarkUnsafe(Info, TheAccess);
1831 }
1832
1833 /// TypeHasComponent - Return true if T has a component type with the
1834 /// specified offset and size. If Size is zero, do not check the size.
1835 bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
1836 const DataLayout &DL) {
1837 Type *EltTy;
1838 uint64_t EltSize;
1839 if (StructType *ST = dyn_cast(T)) {
1840 const StructLayout *Layout = DL.getStructLayout(ST);
1841 unsigned EltIdx = Layout->getElementContainingOffset(Offset);
1842 EltTy = ST->getContainedType(EltIdx);
1843 EltSize = DL.getTypeAllocSize(EltTy);
1844 Offset -= Layout->getElementOffset(EltIdx);
1845 } else if (ArrayType *AT = dyn_cast(T)) {
1846 EltTy = AT->getElementType();
1847 EltSize = DL.getTypeAllocSize(EltTy);
1848 if (Offset >= AT->getNumElements() * EltSize)
1849 return false;
1850 Offset %= EltSize;
1851 } else if (VectorType *VT = dyn_cast(T)) {
1852 EltTy = VT->getElementType();
1853 EltSize = DL.getTypeAllocSize(EltTy);
1854 if (Offset >= VT->getNumElements() * EltSize)
1855 return false;
1856 Offset %= EltSize;
1857 } else {
1858 return false;
1859 }
1860 if (Offset == 0 && (Size == 0 || EltSize == Size))
1861 return true;
1862 // Check if the component spans multiple elements.
1863 if (Offset + Size > EltSize)
1864 return false;
1865 return TypeHasComponent(EltTy, Offset, Size, DL);
1866 }
1867
1868 /// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
1869 /// the instruction I, which references it, to use the separate elements.
1870 /// Offset indicates the position within AI that is referenced by this
1871 /// instruction.
1872 void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
1873 SmallVectorImpl &NewElts) {
1874 const DataLayout &DL = I->getModule()->getDataLayout();
1875 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
1876 Use &TheUse = *UI++;
1877 Instruction *User = cast(TheUse.getUser());
1878
1879 if (BitCastInst *BC = dyn_cast(User)) {
1880 RewriteBitCast(BC, AI, Offset, NewElts);
1881 continue;
1882 }
1883
1884 if (GetElementPtrInst *GEPI = dyn_cast(User)) {
1885 RewriteGEP(GEPI, AI, Offset, NewElts);
1886 continue;
1887 }
1888
1889 if (MemIntrinsic *MI = dyn_cast(User)) {
1890 ConstantInt *Length = dyn_cast(MI->getLength());
1891 uint64_t MemSize = Length->getZExtValue();
1892 if (Offset == 0 && MemSize == DL.getTypeAllocSize(AI->getAllocatedType()))
1893 RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
1894 // Otherwise the intrinsic can only touch a single element and the
1895 // address operand will be updated, so nothing else needs to be done.
1896 continue;
1897 }
1898
1899 if (IntrinsicInst *II = dyn_cast(User)) {
1900 if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
1901 II->getIntrinsicID() == Intrinsic::lifetime_end) {
1902 RewriteLifetimeIntrinsic(II, AI, Offset, NewElts);
1903 }
1904 continue;
1905 }
1906
1907 if (LoadInst *LI = dyn_cast(User)) {
1908 Type *LIType = LI->getType();
1909
1910 if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
1911 // Replace:
1912 // %res = load { i32, i32 }* %alloc
1913 // with:
1914 // %load.0 = load i32* %alloc.0
1915 // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
1916 // %load.1 = load i32* %alloc.1
1917 // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
1918 // (Also works for arrays instead of structs)
1919 Value *Insert = UndefValue::get(LIType);
1920 IRBuilder<> Builder(LI);
1921 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1922 Value *Load = Builder.CreateLoad(NewElts[i], "load");
1923 Insert = Builder.CreateInsertValue(Insert, Load, i, "insert");
1924 }
1925 LI->replaceAllUsesWith(Insert);
1926 DeadInsts.push_back(LI);
1927 } else if (LIType->isIntegerTy() &&
1928 DL.getTypeAllocSize(LIType) ==
1929 DL.getTypeAllocSize(AI->getAllocatedType())) {
1930 // If this is a load of the entire alloca to an integer, rewrite it.
1931 RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
1932 }
1933 continue;
1934 }
1935
1936 if (StoreInst *SI = dyn_cast(User)) {
1937 Value *Val = SI->getOperand(0);
1938 Type *SIType = Val->getType();
1939 if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
1940 // Replace:
1941 // store { i32, i32 } %val, { i32, i32 }* %alloc
1942 // with:
1943 // %val.0 = extractvalue { i32, i32 } %val, 0
1944 // store i32 %val.0, i32* %alloc.0
1945 // %val.1 = extractvalue { i32, i32 } %val, 1
1946 // store i32 %val.1, i32* %alloc.1
1947 // (Also works for arrays instead of structs)
1948 IRBuilder<> Builder(SI);
1949 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1950 Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName());
1951 Builder.CreateStore(Extract, NewElts[i]);
1952 }
1953 DeadInsts.push_back(SI);
1954 } else if (SIType->isIntegerTy() &&
1955 DL.getTypeAllocSize(SIType) ==
1956 DL.getTypeAllocSize(AI->getAllocatedType())) {
1957 // If this is a store of the entire alloca from an integer, rewrite it.
1958 RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
1959 }
1960 continue;
1961 }
1962
1963 if (isa(User) || isa(User)) {
1964 // If we have a PHI user of the alloca itself (as opposed to a GEP or
1965 // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
1966 // the new pointer.
1967 if (!isa(I)) continue;
1968
1969 assert(Offset == 0 && NewElts[0] &&
1970 "Direct alloca use should have a zero offset");
1971
1972 // If we have a use of the alloca, we know the derived uses will be
1973 // utilizing just the first element of the scalarized result. Insert a
1974 // bitcast of the first alloca before the user as required.
1975 AllocaInst *NewAI = NewElts[0];
1976 BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
1977 NewAI->moveBefore(BCI);
1978 TheUse = BCI;
1979 continue;
1980 }
1981 }
1982 }
1983
1984 /// RewriteBitCast - Update a bitcast reference to the alloca being replaced
1985 /// and recursively continue updating all of its uses.
1986 void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
1987 SmallVectorImpl &NewElts) {
1988 RewriteForScalarRepl(BC, AI, Offset, NewElts);
1989 if (BC->getOperand(0) != AI)
1990 return;
1991
1992 // The bitcast references the original alloca. Replace its uses with
1993 // references to the alloca containing offset zero (which is normally at
1994 // index zero, but might not be in cases involving structs with elements
1995 // of size zero).
1996 Type *T = AI->getAllocatedType();
1997 uint64_t EltOffset = 0;
1998 Type *IdxTy;
1999 uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy,
2000 BC->getModule()->getDataLayout());
2001 Instruction *Val = NewElts[Idx];
2002 if (Val->getType() != BC->getDestTy()) {
2003 Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
2004 Val->takeName(BC);
2005 }
2006 BC->replaceAllUsesWith(Val);
2007 DeadInsts.push_back(BC);
2008 }
2009
2010 /// FindElementAndOffset - Return the index of the element containing Offset
2011 /// within the specified type, which must be either a struct or an array.
2012 /// Sets T to the type of the element and Offset to the offset within that
2013 /// element. IdxTy is set to the type of the index result to be used in a
2014 /// GEP instruction.
2015 uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy,
2016 const DataLayout &DL) {
2017 uint64_t Idx = 0;
2018
2019 if (StructType *ST = dyn_cast(T)) {
2020 const StructLayout *Layout = DL.getStructLayout(ST);
2021 Idx = Layout->getElementContainingOffset(Offset);
2022 T = ST->getContainedType(Idx);
2023 Offset -= Layout->getElementOffset(Idx);
2024 IdxTy = Type::getInt32Ty(T->getContext());
2025 return Idx;
2026 } else if (ArrayType *AT = dyn_cast(T)) {
2027 T = AT->getElementType();
2028 uint64_t EltSize = DL.getTypeAllocSize(T);
2029 Idx = Offset / EltSize;
2030 Offset -= Idx * EltSize;
2031 IdxTy = Type::getInt64Ty(T->getContext());
2032 return Idx;
2033 }
2034 VectorType *VT = cast(T);
2035 T = VT->getElementType();
2036 uint64_t EltSize = DL.getTypeAllocSize(T);
2037 Idx = Offset / EltSize;
2038 Offset -= Idx * EltSize;
2039 IdxTy = Type::getInt64Ty(T->getContext());
2040 return Idx;
2041 }
2042
2043 /// RewriteGEP - Check if this GEP instruction moves the pointer across
2044 /// elements of the alloca that are being split apart, and if so, rewrite
2045 /// the GEP to be relative to the new element.
2046 void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
2047 SmallVectorImpl &NewElts) {
2048 uint64_t OldOffset = Offset;
2049 const DataLayout &DL = GEPI->getModule()->getDataLayout();
2050 SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end());
2051 // If the GEP was dynamic then it must have been a dynamic vector lookup.
2052 // In this case, it must be the last GEP operand which is dynamic so keep that
2053 // aside until we've found the constant GEP offset then add it back in at the
2054 // end.
2055 Value* NonConstantIdx = nullptr;
2056 if (!GEPI->hasAllConstantIndices())
2057 NonConstantIdx = Indices.pop_back_val();
2058 Offset += DL.getIndexedOffsetInType(GEPI->getSourceElementType(), Indices);
2059
2060 RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
2061
2062 Type *T = AI->getAllocatedType();
2063 Type *IdxTy;
2064 uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy, DL);
2065 if (GEPI->getOperand(0) == AI)
2066 OldIdx = ~0ULL; // Force the GEP to be rewritten.
2067
2068 T = AI->getAllocatedType();
2069 uint64_t EltOffset = Offset;
2070 uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, DL);
2071
2072 // If this GEP does not move the pointer across elements of the alloca
2073 // being split, then it does not needs to be rewritten.
2074 if (Idx == OldIdx)
2075 return;
2076
2077 Type *i32Ty = Type::getInt32Ty(AI->getContext());
2078 SmallVector NewArgs;
2079 NewArgs.push_back(Constant::getNullValue(i32Ty));
2080 while (EltOffset != 0) {
2081 uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy, DL);
2082 NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
2083 }
2084 if (NonConstantIdx) {
2085 Type* GepTy = T;
2086 // This GEP has a dynamic index. We need to add "i32 0" to index through
2087 // any structs or arrays in the original type until we get to the vector
2088 // to index.
2089 while (!isa(GepTy)) {
2090 NewArgs.push_back(Constant::getNullValue(i32Ty));
2091 GepTy = cast(GepTy)->getTypeAtIndex(0U);
2092 }
2093 NewArgs.push_back(NonConstantIdx);
2094 }
2095 Instruction *Val = NewElts[Idx];
2096 if (NewArgs.size() > 1) {
2097 Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
2098 Val->takeName(GEPI);
2099 }
2100 if (Val->getType() != GEPI->getType())
2101 Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
2102 GEPI->replaceAllUsesWith(Val);
2103 DeadInsts.push_back(GEPI);
2104 }
2105
2106 /// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it
2107 /// to mark the lifetime of the scalarized memory.
2108 void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
2109 uint64_t Offset,
2110 SmallVectorImpl &NewElts) {
2111 ConstantInt *OldSize = cast(II->getArgOperand(0));
2112 // Put matching lifetime markers on everything from Offset up to
2113 // Offset+OldSize.
2114 Type *AIType = AI->getAllocatedType();
2115 const DataLayout &DL = II->getModule()->getDataLayout();
2116 uint64_t NewOffset = Offset;
2117 Type *IdxTy;
2118 uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy, DL);
2119
2120 IRBuilder<> Builder(II);
2121 uint64_t Size = OldSize->getLimitedValue();
2122
2123 if (NewOffset) {
2124 // Splice the first element and index 'NewOffset' bytes in. SROA will
2125 // split the alloca again later.
2126 unsigned AS = AI->getType()->getAddressSpace();
2127 Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
2128 V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset));
2129
2130 IdxTy = NewElts[Idx]->getAllocatedType();
2131 uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset;
2132 if (EltSize > Size) {
2133 EltSize = Size;
2134 Size = 0;
2135 } else {
2136 Size -= EltSize;
2137 }
2138 if (II->getIntrinsicID() == Intrinsic::lifetime_start)
2139 Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize));
2140 else
2141 Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize));
2142 ++Idx;
2143 }
2144
2145 for (; Idx != NewElts.size() && Size; ++Idx) {
2146 IdxTy = NewElts[Idx]->getAllocatedType();
2147 uint64_t EltSize = DL.getTypeAllocSize(IdxTy);
2148 if (EltSize > Size) {
2149 EltSize = Size;
2150 Size = 0;
2151 } else {
2152 Size -= EltSize;
2153 }
2154 if (II->getIntrinsicID() == Intrinsic::lifetime_start)
2155 Builder.CreateLifetimeStart(NewElts[Idx],
2156 Builder.getInt64(EltSize));
2157 else
2158 Builder.CreateLifetimeEnd(NewElts[Idx],
2159 Builder.getInt64(EltSize));
2160 }
2161 DeadInsts.push_back(II);
2162 }
2163
2164 /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
2165 /// Rewrite it to copy or set the elements of the scalarized memory.
2166 void
2167 SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
2168 AllocaInst *AI,
2169 SmallVectorImpl &NewElts) {
2170 // If this is a memcpy/memmove, construct the other pointer as the
2171 // appropriate type. The "Other" pointer is the pointer that goes to memory
2172 // that doesn't have anything to do with the alloca that we are promoting. For
2173 // memset, this Value* stays null.
2174 Value *OtherPtr = nullptr;
2175 unsigned MemAlignment = MI->getAlignment();
2176 if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy
2177 if (Inst == MTI->getRawDest())
2178 OtherPtr = MTI->getRawSource();
2179 else {
2180 assert(Inst == MTI->getRawSource());
2181 OtherPtr = MTI->getRawDest();
2182 }
2183 }
2184
2185 // If there is an other pointer, we want to convert it to the same pointer
2186 // type as AI has, so we can GEP through it safely.
2187 if (OtherPtr) {
2188 unsigned AddrSpace =
2189 cast(OtherPtr->getType())->getAddressSpace();
2190
2191 // Remove bitcasts and all-zero GEPs from OtherPtr. This is an
2192 // optimization, but it's also required to detect the corner case where
2193 // both pointer operands are referencing the same memory, and where
2194 // OtherPtr may be a bitcast or GEP that currently being rewritten. (This
2195 // function is only called for mem intrinsics that access the whole
2196 // aggregate, so non-zero GEPs are not an issue here.)
2197 OtherPtr = OtherPtr->stripPointerCasts();
2198
2199 // Copying the alloca to itself is a no-op: just delete it.
2200 if (OtherPtr == AI || OtherPtr == NewElts[0]) {
2201 // This code will run twice for a no-op memcpy -- once for each operand.
2202 // Put only one reference to MI on the DeadInsts list.
2203 for (SmallVectorImpl::const_iterator I = DeadInsts.begin(),
2204 E = DeadInsts.end(); I != E; ++I)
2205 if (*I == MI) return;
2206 DeadInsts.push_back(MI);
2207 return;
2208 }
2209
2210 // If the pointer is not the right type, insert a bitcast to the right
2211 // type.
2212 Type *NewTy = PointerType::get(AI->getAllocatedType(), AddrSpace);
2213
2214 if (OtherPtr->getType() != NewTy)
2215 OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
2216 }
2217
2218 // Process each element of the aggregate.
2219 bool SROADest = MI->getRawDest() == Inst;
2220
2221 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
2222 const DataLayout &DL = MI->getModule()->getDataLayout();
2223
2224 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
2225 // If this is a memcpy/memmove, emit a GEP of the other element address.
2226 Value *OtherElt = nullptr;
2227 unsigned OtherEltAlign = MemAlignment;
2228
2229 if (OtherPtr) {
2230 Value *Idx[2] = { Zero,
2231 ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
2232 OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx,
2233 OtherPtr->getName()+"."+Twine(i),
2234 MI);
2235 uint64_t EltOffset;
2236 Type *OtherTy = AI->getAllocatedType();
2237 if (StructType *ST = dyn_cast(OtherTy)) {
2238 EltOffset = DL.getStructLayout(ST)->getElementOffset(i);
2239 } else {
2240 Type *EltTy = cast(OtherTy)->getElementType();
2241 EltOffset = DL.getTypeAllocSize(EltTy) * i;
2242 }
2243
2244 // The alignment of the other pointer is the guaranteed alignment of the
2245 // element, which is affected by both the known alignment of the whole
2246 // mem intrinsic and the alignment of the element. If the alignment of
2247 // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
2248 // known alignment is just 4 bytes.
2249 OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
2250 }
2251
2252 AllocaInst *EltPtr = NewElts[i];
2253 Type *EltTy = EltPtr->getAllocatedType();
2254
2255 // If we got down to a scalar, insert a load or store as appropriate.
2256 if (EltTy->isSingleValueType()) {
2257 if (isa(MI)) {
2258 if (SROADest) {
2259 // From Other to Alloca.
2260 Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
2261 new StoreInst(Elt, EltPtr, MI);
2262 } else {
2263 // From Alloca to Other.
2264 Value *Elt = new LoadInst(EltPtr, "tmp", MI);
2265 new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
2266 }
2267 continue;
2268 }
2269 assert(isa(MI));
2270
2271 // If the stored element is zero (common case), just store a null
2272 // constant.
2273 Constant *StoreVal;
2274 if (ConstantInt *CI = dyn_cast(MI->getArgOperand(1))) {
2275 if (CI->isZero()) {
2276 StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
2277 } else {
2278 // If EltTy is a vector type, get the element type.
2279 Type *ValTy = EltTy->getScalarType();
2280
2281 // Construct an integer with the right value.
2282 unsigned EltSize = DL.getTypeSizeInBits(ValTy);
2283 APInt OneVal(EltSize, CI->getZExtValue());
2284 APInt TotalVal(OneVal);
2285 // Set each byte.
2286 for (unsigned i = 0; 8*i < EltSize; ++i) {
2287 TotalVal = TotalVal.shl(8);
2288 TotalVal |= OneVal;
2289 }
2290
2291 // Convert the integer value to the appropriate type.
2292 StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
2293 if (ValTy->isPointerTy())
2294 StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
2295 else if (ValTy->isFloatingPointTy())
2296 StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
2297 assert(StoreVal->getType() == ValTy && "Type mismatch!");
2298
2299 // If the requested value was a vector constant, create it.
2300 if (EltTy->isVectorTy()) {
2301 unsigned NumElts = cast(EltTy)->getNumElements();
2302 StoreVal = ConstantVector::getSplat(NumElts, StoreVal);
2303 }
2304 }
2305 new StoreInst(StoreVal, EltPtr, MI);
2306 continue;
2307 }
2308 // Otherwise, if we're storing a byte variable, use a memset call for
2309 // this element.
2310 }
2311
2312 unsigned EltSize = DL.getTypeAllocSize(EltTy);
2313 if (!EltSize)
2314 continue;
2315
2316 IRBuilder<> Builder(MI);
2317
2318 // Finally, insert the meminst for this element.
2319 if (isa(MI)) {
2320 Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
2321 MI->isVolatile());
2322 } else {
2323 assert(isa(MI));
2324 Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
2325 Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
2326
2327 if (isa(MI))
2328 Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
2329 else
2330 Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
2331 }
2332 }
2333 DeadInsts.push_back(MI);
2334 }
2335
2336 /// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
2337 /// overwrites the entire allocation. Extract out the pieces of the stored
2338 /// integer and store them individually.
2339 void
2340 SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
2341 SmallVectorImpl &NewElts) {
2342 // Extract each element out of the integer according to its structure offset
2343 // and store the element value to the individual alloca.
2344 Value *SrcVal = SI->getOperand(0);
2345 Type *AllocaEltTy = AI->getAllocatedType();
2346 const DataLayout &DL = SI->getModule()->getDataLayout();
2347 uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy);
2348
2349 IRBuilder<> Builder(SI);
2350
2351 // Handle tail padding by extending the operand
2352 if (DL.getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
2353 SrcVal = Builder.CreateZExt(SrcVal,
2354 IntegerType::get(SI->getContext(), AllocaSizeBits));
2355
2356 DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
2357 << '\n');
2358
2359 // There are two forms here: AI could be an array or struct. Both cases
2360 // have different ways to compute the element offset.
2361 if (StructType *EltSTy = dyn_cast(AllocaEltTy)) {
2362 const StructLayout *Layout = DL.getStructLayout(EltSTy);
2363
2364 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
2365 // Get the number of bits to shift SrcVal to get the value.
2366 Type *FieldTy = EltSTy->getElementType(i);
2367 uint64_t Shift = Layout->getElementOffsetInBits(i);
2368
2369 if (DL.isBigEndian())
2370 Shift = AllocaSizeBits - Shift - DL.getTypeAllocSizeInBits(FieldTy);
2371
2372 Value *EltVal = SrcVal;
2373 if (Shift) {
2374 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
2375 EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
2376 }
2377
2378 // Truncate down to an integer of the right size.
2379 uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy);
2380
2381 // Ignore zero sized fields like {}, they obviously contain no data.
2382 if (FieldSizeBits == 0) continue;
2383
2384 if (FieldSizeBits != AllocaSizeBits)
2385 EltVal = Builder.CreateTrunc(EltVal,
2386 IntegerType::get(SI->getContext(), FieldSizeBits));
2387 Value *DestField = NewElts[i];
2388 if (EltVal->getType() == FieldTy) {
2389 // Storing to an integer field of this size, just do it.
2390 } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
2391 // Bitcast to the right element type (for fp/vector values).
2392 EltVal = Builder.CreateBitCast(EltVal, FieldTy);
2393 } else {
2394 // Otherwise, bitcast the dest pointer (for aggregates).
2395 DestField = Builder.CreateBitCast(DestField,
2396 PointerType::getUnqual(EltVal->getType()));
2397 }
2398 new StoreInst(EltVal, DestField, SI);
2399 }
2400
2401 } else {
2402 ArrayType *ATy = cast(AllocaEltTy);
2403 Type *ArrayEltTy = ATy->getElementType();
2404 uint64_t ElementOffset = DL.getTypeAllocSizeInBits(ArrayEltTy);
2405 uint64_t ElementSizeBits = DL.getTypeSizeInBits(ArrayEltTy);
2406
2407 uint64_t Shift;
2408
2409 if (DL.isBigEndian())
2410 Shift = AllocaSizeBits-ElementOffset;
2411 else
2412 Shift = 0;
2413
2414 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
2415 // Ignore zero sized fields like {}, they obviously contain no data.
2416 if (ElementSizeBits == 0) continue;
2417
2418 Value *EltVal = SrcVal;
2419 if (Shift) {
2420 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
2421 EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
2422 }
2423
2424 // Truncate down to an integer of the right size.
2425 if (ElementSizeBits != AllocaSizeBits)
2426 EltVal = Builder.CreateTrunc(EltVal,
2427 IntegerType::get(SI->getContext(),
2428 ElementSizeBits));
2429 Value *DestField = NewElts[i];
2430 if (EltVal->getType() == ArrayEltTy) {
2431 // Storing to an integer field of this size, just do it.
2432 } else if (ArrayEltTy->isFloatingPointTy() ||
2433 ArrayEltTy->isVectorTy()) {
2434 // Bitcast to the right element type (for fp/vector values).
2435 EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
2436 } else {
2437 // Otherwise, bitcast the dest pointer (for aggregates).
2438 DestField = Builder.CreateBitCast(DestField,
2439 PointerType::getUnqual(EltVal->getType()));
2440 }
2441 new StoreInst(EltVal, DestField, SI);
2442
2443 if (DL.isBigEndian())
2444 Shift -= ElementOffset;
2445 else
2446 Shift += ElementOffset;
2447 }
2448 }
2449
2450 DeadInsts.push_back(SI);
2451 }
2452
2453 /// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
2454 /// an integer. Load the individual pieces to form the aggregate value.
2455 void
2456 SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
2457 SmallVectorImpl &NewElts) {
2458 // Extract each element out of the NewElts according to its structure offset
2459 // and form the result value.
2460 Type *AllocaEltTy = AI->getAllocatedType();
2461 const DataLayout &DL = LI->getModule()->getDataLayout();
2462 uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy);
2463
2464 DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
2465 << '\n');
2466
2467 // There are two forms here: AI could be an array or struct. Both cases
2468 // have different ways to compute the element offset.
2469 const StructLayout *Layout = nullptr;
2470 uint64_t ArrayEltBitOffset = 0;
2471 if (StructType *EltSTy = dyn_cast(AllocaEltTy)) {
2472 Layout = DL.getStructLayout(EltSTy);
2473 } else {
2474 Type *ArrayEltTy = cast(AllocaEltTy)->getElementType();
2475 ArrayEltBitOffset = DL.getTypeAllocSizeInBits(ArrayEltTy);
2476 }
2477
2478 Value *ResultVal =
2479 Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
2480
2481 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
2482 // Load the value from the alloca. If the NewElt is an aggregate, cast
2483 // the pointer to an integer of the same size before doing the load.
2484 Value *SrcField = NewElts[i];
2485 Type *FieldTy = NewElts[i]->getAllocatedType();
2486 uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy);
2487
2488 // Ignore zero sized fields like {}, they obviously contain no data.
2489 if (FieldSizeBits == 0) continue;
2490
2491 IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
2492 FieldSizeBits);
2493 if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
2494 !FieldTy->isVectorTy())
2495 SrcField = new BitCastInst(SrcField,
2496 PointerType::getUnqual(FieldIntTy),
2497 "", LI);
2498 SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
2499
2500 // If SrcField is a fp or vector of the right size but that isn't an
2501 // integer type, bitcast to an integer so we can shift it.
2502 if (SrcField->getType() != FieldIntTy)
2503 SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
2504
2505 // Zero extend the field to be the same size as the final alloca so that
2506 // we can shift and insert it.
2507 if (SrcField->getType() != ResultVal->getType())
2508 SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
2509
2510 // Determine the number of bits to shift SrcField.
2511 uint64_t Shift;
2512 if (Layout) // Struct case.
2513 Shift = Layout->getElementOffsetInBits(i);
2514 else // Array case.
2515 Shift = i*ArrayEltBitOffset;
2516
2517 if (DL.isBigEndian())
2518 Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
2519
2520 if (Shift) {
2521 Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
2522 SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
2523 }
2524
2525 // Don't create an 'or x, 0' on the first iteration.
2526 if (!isa(ResultVal) ||
2527 !cast(ResultVal)->isNullValue())
2528 ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
2529 else
2530 ResultVal = SrcField;
2531 }
2532
2533 // Handle tail padding by truncating the result
2534 if (DL.getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
2535 ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
2536
2537 LI->replaceAllUsesWith(ResultVal);
2538 DeadInsts.push_back(LI);
2539 }
2540
2541 /// HasPadding - Return true if the specified type has any structure or
2542 /// alignment padding in between the elements that would be split apart
2543 /// by SROA; return false otherwise.
2544 static bool HasPadding(Type *Ty, const DataLayout &DL) {
2545 if (ArrayType *ATy = dyn_cast(Ty)) {
2546 Ty = ATy->getElementType();
2547 return DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty);
2548 }
2549
2550 // SROA currently handles only Arrays and Structs.
2551 StructType *STy = cast(Ty);
2552 const StructLayout *SL = DL.getStructLayout(STy);
2553 unsigned PrevFieldBitOffset = 0;
2554 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
2555 unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
2556
2557 // Check to see if there is any padding between this element and the
2558 // previous one.
2559 if (i) {
2560 unsigned PrevFieldEnd =
2561 PrevFieldBitOffset+DL.getTypeSizeInBits(STy->getElementType(i-1));
2562 if (PrevFieldEnd < FieldBitOffset)
2563 return true;
2564 }
2565 PrevFieldBitOffset = FieldBitOffset;
2566 }
2567 // Check for tail padding.
2568 if (unsigned EltCount = STy->getNumElements()) {
2569 unsigned PrevFieldEnd = PrevFieldBitOffset +
2570 DL.getTypeSizeInBits(STy->getElementType(EltCount-1));
2571 if (PrevFieldEnd < SL->getSizeInBits())
2572 return true;
2573 }
2574 return false;
2575 }
2576
2577 /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
2578 /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
2579 /// or 1 if safe after canonicalization has been performed.
2580 bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
2581 // Loop over the use list of the alloca. We can only transform it if all of
2582 // the users are safe to transform.
2583 AllocaInfo Info(AI);
2584
2585 isSafeForScalarRepl(AI, 0, Info);
2586 if (Info.isUnsafe) {
2587 DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
2588 return false;
2589 }
2590
2591 const DataLayout &DL = AI->getModule()->getDataLayout();
2592
2593 // Okay, we know all the users are promotable. If the aggregate is a memcpy
2594 // source and destination, we have to be careful. In particular, the memcpy
2595 // could be moving around elements that live in structure padding of the LLVM
2596 // types, but may actually be used. In these cases, we refuse to promote the
2597 // struct.
2598 if (Info.isMemCpySrc && Info.isMemCpyDst &&
2599 HasPadding(AI->getAllocatedType(), DL))
2600 return false;
2601
2602 // If the alloca never has an access to just *part* of it, but is accessed
2603 // via loads and stores, then we should use ConvertToScalarInfo to promote
2604 // the alloca instead of promoting each piece at a time and inserting fission
2605 // and fusion code.
2606 if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
2607 // If the struct/array just has one element, use basic SRoA.
2608 if (StructType *ST = dyn_cast(AI->getAllocatedType())) {
2609 if (ST->getNumElements() > 1) return false;
2610 } else {
2611 if (cast(AI->getAllocatedType())->getNumElements() > 1)
2612 return false;
2613 }
2614 }
2615
2616 return true;
2617 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: opt < %s -scalarrepl -instcombine | \
1 ; RUN: opt < %s -sroa -instcombine | \
22 ; RUN: llc -march=x86 -mcpu=yonah | not grep sub.*esp
33
44 ; This checks that various insert/extract idiom work without going to the
None ; RUN: opt %s -argpromotion -scalarrepl -S | FileCheck %s
0 ; RUN: opt %s -argpromotion -sroa -S | FileCheck %s
11
22 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
33
44 %struct.ss = type { i32, i32 }
55
6 ; Argpromote + scalarrepl should change this to passing the two integers by value.
6 ; Argpromote + sroa should change this to passing the two integers by value.
77 define internal i32 @f(%struct.ss* inalloca %s) {
88 entry:
99 %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0
None ; RUN: opt < %s -inline -scalarrepl -S | FileCheck %s
0 ; RUN: opt < %s -inline -sroa -S | FileCheck %s
11 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
22
33 define i32 @test1f(i32 %i) {
None ; RUN: opt -inline -scalarrepl -max-cg-scc-iterations=1 -disable-output < %s
0 ; RUN: opt -inline -sroa -max-cg-scc-iterations=1 -disable-output < %s
11 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
22 target triple = "x86_64-apple-darwin10.3"
33
None ; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine < %s | FileCheck %s
0 ; RUN: opt -basicaa -inline -S -sroa -gvn -instcombine < %s | FileCheck %s
11 ; PR5009
22
33 ; CHECK: define i32 @main()
None ; RUN: opt < %s -instcombine -scalarrepl -S | not grep " = alloca"
0 ; RUN: opt < %s -instcombine -sroa -S | not grep " = alloca"
11 ; rdar://6417724
22 ; Instcombine shouldn't do anything to this function that prevents promoting the allocas inside it.
33
None ; RUN: opt < %s -scalarrepl -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info
0 ; RUN: opt < %s -sroa -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info
11
22 define void @inflate() {
33 entry:
None ; RUN: opt < %s -scalarrepl-ssa -loop-unswitch -disable-output
0 ; RUN: opt < %s -sroa -loop-unswitch -disable-output
11 ; PR11016
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
33 target triple = "x86_64-apple-macosx10.7.2"
+0
-13
test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll less more
None ; RUN: opt < %s -scalarrepl -instcombine -S | not grep alloca
1 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
2
3 ; Test that an array is not incorrectly deconstructed.
4
5 define i32 @test() nounwind {
6 %X = alloca [4 x i32] ; <[4 x i32]*> [#uses=1]
7 %Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 0 ; [#uses=1]
8 ; Must preserve arrayness!
9 %Z = getelementptr i32, i32* %Y, i64 1 ; [#uses=1]
10 %A = load i32, i32* %Z ; [#uses=1]
11 ret i32 %A
12 }
+0
-12
test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll less more
None ; Scalar replacement was incorrectly promoting this alloca!!
1 ;
2 ; RUN: opt < %s -scalarrepl -S | FileCheck %s
3
4 define i8* @test() {
5 %A = alloca [30 x i8] ; <[30 x i8]*> [#uses=1]
6 %B = getelementptr [30 x i8], [30 x i8]* %A, i64 0, i64 0 ; [#uses=2]
7 %C = getelementptr i8, i8* %B, i64 1 ; [#uses=1]
8 store i8 0, i8* %B
9 ret i8* %C
10 }
11 ; CHECK: alloca [
+0
-16
test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll less more
None ; RUN: opt < %s -scalarrepl -S | grep "alloca %%T"
1
2 %T = type { [80 x i8], i32, i32 }
3 declare i32 @.callback_1(i8*)
4
5 declare void @.iter_2(i32 (i8*)*, i8*)
6
7 define i32 @main() {
8 %d = alloca %T ; <{ [80 x i8], i32, i32 }*> [#uses=2]
9 %tmp.0 = getelementptr %T, %T* %d, i64 0, i32 2 ; [#uses=1]
10 store i32 0, i32* %tmp.0
11 %tmp.1 = getelementptr %T, %T* %d, i64 0, i32 0, i64 0 ; [#uses=1]
12 call void @.iter_2( i32 (i8*)* @.callback_1, i8* %tmp.1 )
13 ret i32 0
14 }
15
+0
-20
test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll less more
None ; RUN: opt < %s -scalarrepl -S | not grep alloca
1 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
2
3 define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
4 %vsiidx = alloca [2 x <4 x i32>], align 16 ; <[2 x <4 x i32>]*> [#uses=3]
5 %tmp = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v0 ) ; <<4 x i32>> [#uses=2]
6 %tmp.upgrd.1 = bitcast <4 x i32> %tmp to <2 x i64> ; <<2 x i64>> [#uses=0]
7 %tmp.upgrd.2 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
8 store <4 x i32> %tmp, <4 x i32>* %tmp.upgrd.2
9 %tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v1 ) ; <<4 x i32>> [#uses=2]
10 %tmp10.upgrd.3 = bitcast <4 x i32> %tmp10 to <2 x i64> ; <<2 x i64>> [#uses=0]
11 %tmp14 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 1 ; <<4 x i32>*> [#uses=1]
12 store <4 x i32> %tmp10, <4 x i32>* %tmp14
13 %tmp15 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 0, i32 4 ; [#uses=1]
14 %tmp.upgrd.4 = load i32, i32* %tmp15 ; [#uses=1]
15 ret i32 %tmp.upgrd.4
16 }
17
18 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
19
+0
-24
test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll less more
None ; RUN: opt < %s -scalarrepl -S | grep memcpy
1 ; PR1421
2
3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
4 target triple = "i686-apple-darwin8"
5
6 %struct.LongestMember = type { i8, i32 }
7 %struct.MyString = type { i32 }
8 %struct.UnionType = type { %struct.LongestMember }
9
10 define void @_Z4testP9UnionTypePS0_(%struct.UnionType* %p, %struct.UnionType** %pointerToUnion) {
11 entry:
12 %tmp = alloca %struct.UnionType, align 8
13 %tmp2 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
14 %tmp13 = getelementptr %struct.UnionType, %struct.UnionType* %p, i32 0, i32 0, i32 0
15 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 0, i1 false)
16 %tmp5 = load %struct.UnionType*, %struct.UnionType** %pointerToUnion
17 %tmp56 = getelementptr %struct.UnionType, %struct.UnionType* %tmp5, i32 0, i32 0, i32 0
18 %tmp7 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
19 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i32 0, i1 false)
20 ret void
21 }
22
23 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+0
-36
test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll less more
None ; RUN: opt < %s -scalarrepl -S | not grep shr
1
2 ; FIXME: I think this test is no longer valid.
3 ; It was working because SROA was aborting when
4 ; no datalayout was supplied
5 ; XFAIL: *
6
7
8 %struct.S = type { i16 }
9
10 define zeroext i1 @f(i16 signext %b) {
11 entry:
12 %b_addr = alloca i16 ; [#uses=2]
13 %retval = alloca i32 ; [#uses=2]
14 %s = alloca %struct.S ; <%struct.S*> [#uses=2]
15 %tmp = alloca i32 ; [#uses=2]
16 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
17 store i16 %b, i16* %b_addr
18 %tmp1 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0 ; [#uses=1]
19 %tmp2 = load i16, i16* %b_addr, align 2 ; [#uses=1]
20 store i16 %tmp2, i16* %tmp1, align 2
21 %tmp3 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0 ; [#uses=1]
22 %tmp34 = bitcast i16* %tmp3 to [2 x i1]* ; <[2 x i1]*> [#uses=1]
23 %tmp5 = getelementptr [2 x i1], [2 x i1]* %tmp34, i32 0, i32 1 ; [#uses=1]
24 %tmp6 = load i1, i1* %tmp5, align 1 ; [#uses=1]
25 %tmp67 = zext i1 %tmp6 to i32 ; [#uses=1]
26 store i32 %tmp67, i32* %tmp, align 4
27 %tmp8 = load i32, i32* %tmp, align 4 ; [#uses=1]
28 store i32 %tmp8, i32* %retval, align 4
29 br label %return
30
31 return: ; preds = %entry
32 %retval9 = load i32, i32* %retval ; [#uses=1]
33 %retval910 = trunc i32 %retval9 to i1 ; [#uses=1]
34 ret i1 %retval910
35 }
+0
-21
test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll less more
None ; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i8 17"
1 ; rdar://5707076
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
3 target triple = "i386-apple-darwin9.1.0"
4 %struct.T = type <{ i8, [3 x i8] }>
5
6 define i8 @f() {
7 entry:
8 %s = alloca [1 x %struct.T], align 4 ; <[1 x %struct.T]*> [#uses=2]
9 %T3 = bitcast [1 x %struct.T]* %s to i32*
10 store i32 -61184, i32* %T3
11
12 %tmp16 = getelementptr [1 x %struct.T], [1 x %struct.T]* %s, i32 0, i32 0 ; <%struct.T*> [#uses=1]
13 %tmp17 = getelementptr %struct.T, %struct.T* %tmp16, i32 0, i32 1 ; <[3 x i8]*> [#uses=1]
14 %tmp1718 = bitcast [3 x i8]* %tmp17 to i32* ; [#uses=1]
15 %tmp19 = load i32, i32* %tmp1718, align 4 ; [#uses=1]
16 %mask = and i32 %tmp19, 16777215 ; [#uses=2]
17 %mask2324 = trunc i32 %mask to i8 ; [#uses=1]
18 ret i8 %mask2324
19 }
20
+0
-16
test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll less more
None ; RUN: opt < %s -scalarrepl -S | not grep alloca
1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
2 target triple = "i686-apple-darwin8"
3 %struct..0anon = type { <1 x i64> }
4
5 define i32 @main(i32 %argc, i8** %argv) {
6 entry:
7 %c = alloca %struct..0anon ; <%struct..0anon*> [#uses=2]
8 %tmp2 = getelementptr %struct..0anon, %struct..0anon* %c, i32 0, i32 0 ; <<1 x i64>*> [#uses=1]
9 store <1 x i64> zeroinitializer, <1 x i64>* %tmp2, align 8
10 %tmp7 = getelementptr %struct..0anon, %struct..0anon* %c, i32 0, i32 0 ; <<1 x i64>*> [#uses=1]
11 %tmp78 = bitcast <1 x i64>* %tmp7 to [2 x i32]* ; <[2 x i32]*> [#uses=1]
12 %tmp9 = getelementptr [2 x i32], [2 x i32]* %tmp78, i32 0, i32 0 ; [#uses=1]
13 %tmp10 = load i32, i32* %tmp9, align 4 ; [#uses=0]
14 unreachable
15 }
+0
-33
test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll less more
None ; This test shows an alloca of a struct and an array that can be reduced to
1 ; multiple variables easily. However, the alloca is used by a store
2 ; instruction, which was not possible before aggregrates were first class
3 ; values. This checks of scalarrepl splits up the struct and array properly.
4
5 ; RUN: opt < %s -scalarrepl -S | not grep alloca
6 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
7
8 define i32 @foo() {
9 %target = alloca { i32, i32 } ; <{ i32, i32 }*> [#uses=1]
10 ; Build a first class struct to store
11 %res1 = insertvalue { i32, i32 } undef, i32 1, 0 ; <{ i32, i32 }> [#uses=1]
12 %res2 = insertvalue { i32, i32 } %res1, i32 2, 1 ; <{ i32, i32 }> [#uses=1]
13 ; And store it
14 store { i32, i32 } %res2, { i32, i32 }* %target
15 ; Actually use %target, so it doesn't get removed altogether
16 %ptr = getelementptr { i32, i32 }, { i32, i32 }* %target, i32 0, i32 0
17 %val = load i32, i32* %ptr
18 ret i32 %val
19 }
20
21 define i32 @bar() {
22 %target = alloca [ 2 x i32 ] ; <{ i32, i32 }*> [#uses=1]
23 ; Build a first class array to store
24 %res1 = insertvalue [ 2 x i32 ] undef, i32 1, 0 ; <{ i32, i32 }> [#uses=1]
25 %res2 = insertvalue [ 2 x i32 ] %res1, i32 2, 1 ; <{ i32, i32 }> [#uses=1]
26 ; And store it
27 store [ 2 x i32 ] %res2, [ 2 x i32 ]* %target
28 ; Actually use %target, so it doesn't get removed altogether
29 %ptr = getelementptr [ 2 x i32 ], [ 2 x i32 ]* %target, i32 0, i32 0
30 %val = load i32, i32* %ptr
31 ret i32 %val
32 }
+0
-17
test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll less more
None ; RUN: opt < %s -scalarrepl -S | grep "call.*mem"
1 ; PR2369
2
3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
4 target triple = "i386-apple-darwin8"
5
6 define void @memtest1(i8* %dst, i8* %src) nounwind {
7 entry:
8 %temp = alloca [200 x i8]
9 %temp1 = bitcast [200 x i8]* %temp to i8*
10 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i32 1, i1 false)
11 %temp3 = bitcast [200 x i8]* %temp to i8*
12 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i32 1, i1 false)
13 ret void
14 }
15
16 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+0
-23
test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll less more
None ; RUN: opt < %s -scalarrepl -S | grep "s = alloca .struct.x"
1 ; PR2423
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
3 target triple = "i386-apple-darwin8"
4
5 %struct.x = type { [1 x i32], i32, i32 }
6
7 define i32 @b() nounwind {
8 entry:
9 %s = alloca %struct.x
10 %r = alloca %struct.x
11 %0 = call i32 @a(%struct.x* %s) nounwind
12 %r1 = bitcast %struct.x* %r to i8*
13 %s2 = bitcast %struct.x* %s to i8*
14 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i32 8, i1 false)
15 %1 = getelementptr %struct.x, %struct.x* %r, i32 0, i32 0, i32 1
16 %2 = load i32, i32* %1, align 4
17 ret i32 %2
18 }
19
20 declare i32 @a(%struct.x*)
21
22 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+0
-25
test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll less more
None ; This test checks to see if scalarrepl also works when a gep with all zeroes is
1 ; used instead of a bitcast to prepare a memmove pointer argument. Previously,
2 ; this would not work when there was a vector involved in the struct, preventing
3 ; scalarrepl from removing the alloca below.
4
5 ; RUN: opt < %s -scalarrepl -S > %t
6 ; RUN: cat %t | not grep alloca
7 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
8
9 %struct.two = type <{ < 2 x i8 >, i16 }>
10
11 define void @main(%struct.two* %D, i16 %V) {
12 entry:
13 %S = alloca %struct.two
14 %S.2 = getelementptr %struct.two, %struct.two* %S, i32 0, i32 1
15 store i16 %V, i16* %S.2
16 ; This gep is effectively a bitcast to i8*, but is sometimes generated
17 ; because the type of the first element in %struct.two is i8.
18 %tmpS = getelementptr %struct.two, %struct.two* %S, i32 0, i32 0, i32 0
19 %tmpD = bitcast %struct.two* %D to i8*
20 call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1, i1 false)
21 ret void
22 }
23
24 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+0
-16
test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll less more
None ; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i32 %x"
1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
2 target triple = "i386-pc-linux-gnu"
3
4 %pair = type { [1 x i32], i32 }
5
6 define i32 @f(i32 %x, i32 %y) {
7 %instance = alloca %pair
8 %first = getelementptr %pair, %pair* %instance, i32 0, i32 0
9 %cast = bitcast [1 x i32]* %first to i32*
10 store i32 %x, i32* %cast
11 %second = getelementptr %pair, %pair* %instance, i32 0, i32 1
12 store i32 %y, i32* %second
13 %v = load i32, i32* %cast
14 ret i32 %v
15 }
+0
-20
test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll less more
None ; RUN: opt < %s -scalarrepl -instcombine -inline -instcombine -S | grep "ret i32 42"
1 ; PR3489
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
3 target triple = "x86_64-apple-darwin10.0"
4 %struct.anon = type <{ i32, i32, i32 }>
5
6 define i32 @f({ i64, i64 }) nounwind {
7 entry:
8 %tmp = alloca { i64, i64 }, align 8 ; <{ i64, i64 }*> [#uses=2]
9 store { i64, i64 } %0, { i64, i64 }* %tmp
10 %1 = bitcast { i64, i64 }* %tmp to %struct.anon* ; <%struct.anon*> [#uses=1]
11 %2 = load %struct.anon, %struct.anon* %1, align 8 ; <%struct.anon> [#uses=1]
12 %tmp3 = extractvalue %struct.anon %2, 0
13 ret i32 %tmp3
14 }
15
16 define i32 @g() {
17 %a = call i32 @f({i64,i64} { i64 42, i64 1123123123123123 })
18 ret i32 %a
19 }
+0
-19
test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll less more
None ; The store into %p should end up with a known alignment of 1, since the memcpy
1 ; is only known to access it with 1-byte alignment.
2 ; RUN: opt < %s -scalarrepl -S | grep "store i16 1, .*, align 1"
3 ; PR3720
4 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
5
6 %struct.st = type { i16 }
7
8 define void @f(i8* %p) nounwind {
9 entry:
10 %s = alloca %struct.st, align 4 ; <%struct.st*> [#uses=2]
11 %0 = getelementptr %struct.st, %struct.st* %s, i32 0, i32 0 ; [#uses=1]
12 store i16 1, i16* %0, align 4
13 %s1 = bitcast %struct.st* %s to i8* ; [#uses=1]
14 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %s1, i32 2, i32 1, i1 false)
15 ret void
16 }
17
18 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+0
-90
test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll less more
None ; RUN: opt < %s -scalarrepl -S | FileCheck %s
1 ; Radar 7441282
2
3 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
4 target triple = "thumbv7-apple-darwin10"
5
6 %struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
7 %struct.int16x8_t = type { <8 x i16> }
8 %struct.int16x8x2_t = type { [2 x %struct.int16x8_t] }
9 %union..0anon = type { %struct.int16x8x2_t }
10
11 define void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind {
12 ; CHECK-LABEL: @test(
13 ; CHECK-NOT: alloca
14 ; CHECK: "alloca point"
15 ; CHECK: store <8 x i16>
16 ; CHECK: store <8 x i16>
17
18 entry:
19 %tmp_addr = alloca %struct.int16x8_t
20 %dst_addr = alloca %struct.int16x8x2_t*
21 %__rv = alloca %union..0anon
22 %__bx = alloca %struct.int16x8_t
23 %__ax = alloca %struct.int16x8_t
24 %tmp2 = alloca %struct.int16x8x2_t
25 %0 = alloca %struct.int16x8x2_t
26 %"alloca point" = bitcast i32 0 to i32
27 %1 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
28 store <8 x i16> %tmp.0, <8 x i16>* %1
29 store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr
30 %2 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__ax, i32 0, i32 0
31 %3 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
32 %4 = load <8 x i16>, <8 x i16>* %3, align 16
33 store <8 x i16> %4, <8 x i16>* %2, align 16
34 %5 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__bx, i32 0, i32 0
35 %6 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
36 %7 = load <8 x i16>, <8 x i16>* %6, align 16
37 store <8 x i16> %7, <8 x i16>* %5, align 16
38 %8 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__ax, i32 0, i32 0
39 %9 = load <8 x i16>, <8 x i16>* %8, align 16
40 %10 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__bx, i32 0, i32 0
41 %11 = load <8 x i16>, <8 x i16>* %10, align 16
42 %12 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0
43 %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t*
44 %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32>
45 %15 = getelementptr inbounds %struct.__neon_int16x8x2_t, %struct.__neon_int16x8x2_t* %13, i32 0, i32 0
46 store <8 x i16> %14, <8 x i16>* %15
47 %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32>
48 %17 = getelementptr inbounds %struct.__neon_int16x8x2_t, %struct.__neon_int16x8x2_t* %13, i32 0, i32 1
49 store <8 x i16> %16, <8 x i16>* %17
50 %18 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0
51 %19 = bitcast %struct.int16x8x2_t* %0 to i8*
52 %20 = bitcast %struct.int16x8x2_t* %18 to i8*
53 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i32 16, i1 false)
54 %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
55 %21 = bitcast %struct.int16x8x2_t* %0 to i8*
56 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i32 16, i1 false)
57 %22 = load %struct.int16x8x2_t*, %struct.int16x8x2_t** %dst_addr, align 4
58 %23 = bitcast %struct.int16x8x2_t* %22 to i8*
59 %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
60 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i32 16, i1 false)
61 br label %return
62
63 return: ; preds = %entry
64 ret void
65 }
66
67 ; Radar 7466574
68 %struct._NSRange = type { i64 }
69
70 define void @test_memcpy_self() nounwind {
71 entry:
72 %range = alloca %struct._NSRange
73 br i1 undef, label %cond.true, label %cond.false
74
75 cond.true: ; preds = %entry
76 %tmp3 = bitcast %struct._NSRange* %range to i8*
77 %tmp4 = bitcast %struct._NSRange* %range to i8*
78 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8, i1 false)
79 ret void
80
81 cond.false: ; preds = %entry
82 ret void
83
84 ; CHECK-LABEL: @test_memcpy_self(
85 ; CHECK-NOT: alloca
86 ; CHECK: br i1
87 }
88
89 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+0
-18
test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll less more
None ; RUN: opt < %s -scalarrepl -S | FileCheck %s
1 ; Radar 7552893
2
3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
4
5 %struct.test = type { [3 x double] }
6
7 define void @test_memcpy_self() nounwind {
8 ; CHECK-LABEL: @test_memcpy_self(
9 ; CHECK-NOT: alloca
10 ; CHECK: ret void
11 %1 = alloca %struct.test
12 %2 = bitcast %struct.test* %1 to i8*
13 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i32 4, i1 false)
14 ret void
15 }
16
17 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+0
-26
test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll less more
None ; RUN: opt < %s -instcombine -S | FileCheck %s
1 ; PR9820
2
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
4 target triple = "x86_64-unknown-linux-gnu"
5
6 @func_1.l_10 = internal unnamed_addr constant [4 x i32] [i32 1, i32 0, i32 0, i32 0], align 16
7
8 define i32* @noop(i32* %p_29) nounwind readnone {
9 entry:
10 ret i32* %p_29
11 }
12
13 define i32 @main() nounwind {
14 entry:
15 %l_10 = alloca [4 x i32], align 16
16 %tmp = bitcast [4 x i32]* %l_10 to i8*
17 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([4 x i32]* @func_1.l_10 to i8*), i64 16, i32 16, i1 false)
18 ; CHECK: call void @llvm.memcpy
19 %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %l_10, i64 0, i64 0
20 %call = call i32* @noop(i32* %arrayidx)
21 store i32 0, i32* %call
22 ret i32 0
23 }
24
25 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+0
-75
test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll less more
None ; RUN: opt < %s -S -scalarrepl | FileCheck %s
1 ; RUN: opt < %s -S -scalarrepl-ssa | FileCheck %s
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3 target triple = "x86_64-apple-macosx10.7.0"
4
5 %0 = type { <2 x float>, float }
6 %struct.PointC3 = type { %struct.array }
7 %struct.Point_3 = type { %struct.PointC3 }
8 %struct.array = type { [3 x float], [4 x i8] }
9
10 ; CHECK: main
11 ; CHECK-NOT: alloca
12 ; CHECK: extractelement <2 x float> zeroinitializer, i32 0
13
14 define void @main() uwtable ssp {
15 entry:
16 %ref.tmp2 = alloca %0, align 16
17 %tmpcast = bitcast %0* %ref.tmp2 to %struct.Point_3*
18 %0 = getelementptr %0, %0* %ref.tmp2, i64 0, i32 0
19 store <2 x float> zeroinitializer, <2 x float>* %0, align 16
20 %1 = getelementptr inbounds %struct.Point_3, %struct.Point_3* %tmpcast, i64 0, i32 0
21 %base.i.i.i = getelementptr inbounds %struct.PointC3, %struct.PointC3* %1, i64 0, i32 0
22 %arrayidx.i.i.i.i = getelementptr inbounds %struct.array, %struct.array* %base.i.i.i, i64 0, i32 0, i64 0
23 %tmp5.i.i = load float, float* %arrayidx.i.i.i.i, align 4
24 ret void
25 }
26
27 ; CHECK: test1
28 ; CHECK-NOT: alloca
29 ; CHECK: extractelement <2 x float> zeroinitializer, i32 0
30
31 define void @test1() uwtable ssp {
32 entry:
33 %ref.tmp2 = alloca {<2 x float>, float}, align 16
34 %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float*
35 %0 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 0
36 store <2 x float> zeroinitializer, <2 x float>* %0, align 16
37 %tmp5.i.i = load float, float* %tmpcast, align 4
38 ret void
39 }
40
41 ; CHECK: test2
42 ; CHECK-NOT: alloca
43 ; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> zeroinitializer, i32 0
44 ; CHECK: fadd float %[[A]], 1.000000e+00
45 ; CHECK-NOT: insertelement
46 ; CHECK-NOT: extractelement
47
48 define float @test2() uwtable ssp {
49 entry:
50 %ref.tmp2 = alloca {<2 x float>, float}, align 16
51 %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float*
52 %tmpcast2 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 1
53 %0 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 0
54 store <2 x float> zeroinitializer, <2 x float>* %0, align 16
55 store float 1.0, float* %tmpcast2, align 4
56 %r1 = load float, float* %tmpcast, align 4
57 %r2 = load float, float* %tmpcast2, align 4
58 %r = fadd float %r1, %r2
59 ret float %r
60 }
61
62 ; CHECK: test3
63 ; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> , i32 1
64 ; CHECK: ret float %[[A]]
65
66 define float @test3() {
67 entry:
68 %ai = alloca { <2 x float>, <2 x float> }, align 8
69 store { <2 x float>, <2 x float> } {<2 x float> , <2 x float> }, { <2 x float>, <2 x float> }* %ai, align 8
70 %tmpcast = bitcast { <2 x float>, <2 x float> }* %ai to [4 x float]*
71 %arrayidx = getelementptr inbounds [4 x float], [4 x float]* %tmpcast, i64 0, i64 3
72 %f = load float, float* %arrayidx, align 4
73 ret float %f
74 }
+0
-37
test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll less more
None ; RUN: opt < %s -scalarrepl -S | FileCheck %s
1 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
2 target triple = "thumbv7-apple-darwin10"
3
4 ; CHECK: f
5 ; CHECK-NOT: alloca
6 ; CHECK: %[[A:[a-z0-9]*]] = and i128 undef, -16777216
7 ; CHECK: %[[B:[a-z0-9]*]] = bitcast i128 %[[A]] to <4 x float>
8 ; CHECK: %[[C:[a-z0-9]*]] = extractelement <4 x float> %[[B]], i32 0
9 ; CHECK: ret float %[[C]]
10
11 define float @f() nounwind ssp {
12 entry:
13 %a = alloca <4 x float>, align 16
14 %p = bitcast <4 x float>* %a to i8*
15 call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i32 16, i1 false)
16 %vec = load <4 x float>, <4 x float>* %a, align 8
17 %val = extractelement <4 x float> %vec, i32 0
18 ret float %val
19 }
20
21 ; CHECK: g
22 ; CHECK-NOT: alloca
23 ; CHECK: and i128
24
25 define void @g() nounwind ssp {
26 entry:
27 %a = alloca { <4 x float> }, align 16
28 %p = bitcast { <4 x float> }* %a to i8*
29 call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i32 16, i1 false)
30 %q = bitcast { <4 x float> }* %a to [2 x <2 x float>]*
31 %arrayidx = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* %q, i32 0, i32 0
32 store <2 x float> undef, <2 x float>* %arrayidx, align 8
33