llvm.org GIT mirror llvm / 1d6c2d7
Add an AlignmentFromAssumptions Pass This adds a ScalarEvolution-powered transformation that updates load, store and memory intrinsic pointer alignments based on invariant((a+q) & b == 0) expressions. Many of the simple cases we can get with ValueTracking, but we still need something like this for the more complicated cases (such as those with an offset) that require some algebra. Note that gcc's __builtin_assume_aligned's optional third argument provides exactly for this kind of 'misalignment' offset for which this kind of logic is necessary. The primary motivation is to fixup alignments for vector loads/stores after vectorization (and unrolling). This pass is added to the optimization pipeline just after the SLP vectorizer runs (which, admittedly, does not preserve SE, although I imagine it could). Regardless, I actually don't think that the preservation matters too much in this case: SE computes lazily, and this pass won't issue any SE queries unless there are any assume intrinsics, so there should be no real additional cost in the common case (SLP does preserve DT and LoopInfo). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217344 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 5 years ago
9 changed file(s) with 661 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
7272 void initializeArgPromotionPass(PassRegistry&);
7373 void initializeAtomicExpandPass(PassRegistry&);
7474 void initializeSampleProfileLoaderPass(PassRegistry&);
75 void initializeAlignmentFromAssumptionsPass(PassRegistry&);
7576 void initializeBarrierNoopPass(PassRegistry&);
7677 void initializeBasicAliasAnalysisPass(PassRegistry&);
7778 void initializeCallGraphWrapperPassPass(PassRegistry &);
5151 (void) llvm::createAliasAnalysisCounterPass();
5252 (void) llvm::createAliasDebugger();
5353 (void) llvm::createArgumentPromotionPass();
54 (void) llvm::createAlignmentFromAssumptionsPass();
5455 (void) llvm::createBasicAliasAnalysisPass();
5556 (void) llvm::createLibCallAliasAnalysisPass(nullptr);
5657 (void) llvm::createScalarEvolutionAliasAnalysisPass();
3535
3636 //===----------------------------------------------------------------------===//
3737 //
38 // AlignmentFromAssumptions - Use assume intrinsics to set load/store
39 // alignments.
40 //
41 FunctionPass *createAlignmentFromAssumptionsPass();
42
43 //===----------------------------------------------------------------------===//
44 //
3845 // SCCP - Sparse conditional constant propagation.
3946 //
4047 FunctionPass *createSCCPPass();
3333
3434 /** See llvm::createAggressiveDCEPass function. */
3535 void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
36
37 /** See llvm::createAlignmentFromAssumptionsPass function. */
38 void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM);
3639
3740 /** See llvm::createCFGSimplificationPass function. */
3841 void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM);
309309 if (!DisableUnrollLoops)
310310 MPM.add(createLoopUnrollPass()); // Unroll small loops
311311
312 // After vectorization and unrolling, assume intrinsics may tell us more
313 // about pointer alignments.
314 MPM.add(createAlignmentFromAssumptionsPass());
315
312316 if (!DisableUnitAtATime) {
313317 // FIXME: We shouldn't bother with this anymore.
314318 MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
398402 // More scalar chains could be vectorized due to more alias information
399403 PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
400404
405 // After vectorization, assume intrinsics may tell us more about pointer
406 // alignments.
407 PM.add(createAlignmentFromAssumptionsPass());
408
401409 if (LoadCombine)
402410 PM.add(createLoadCombinePass());
403411
0 //===----------------------- AlignmentFromAssumptions.cpp -----------------===//
1 // Set Load/Store Alignments From Assumptions
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a ScalarEvolution-based transformation to set
11 // the alignments of load, stores and memory intrinsics based on the truth
12 // expressions of assume intrinsics. The primary motivation is to handle
13 // complex alignment assumptions that apply to vector loads and stores that
14 // appear after vectorization and unrolling.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #define AA_NAME "alignment-from-assumptions"
19 #define DEBUG_TYPE AA_NAME
20 #include "llvm/Transforms/Scalar.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/AssumptionTracker.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/Analysis/ScalarEvolution.h"
27 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/IR/Intrinsics.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 using namespace llvm;
37
38 STATISTIC(NumLoadAlignChanged,
39 "Number of loads changed by alignment assumptions");
40 STATISTIC(NumStoreAlignChanged,
41 "Number of stores changed by alignment assumptions");
42 STATISTIC(NumMemIntAlignChanged,
43 "Number of memory intrinsics changed by alignment assumptions");
44
45 namespace {
46 struct AlignmentFromAssumptions : public FunctionPass {
47 static char ID; // Pass identification, replacement for typeid
48 AlignmentFromAssumptions() : FunctionPass(ID) {
49 initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
50 }
51
52 bool runOnFunction(Function &F);
53
54 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
55 AU.addRequired();
56 AU.addRequired();
57 AU.addRequired();
58
59 AU.setPreservesCFG();
60 AU.addPreserved();
61 AU.addPreserved();
62 AU.addPreserved();
63 }
64
65 // For memory transfers, we need a common alignment for both the source and
66 // destination. If we have a new alignment for only one operand of a transfer
67 // instruction, save it in these maps. If we reach the other operand through
68 // another assumption later, then we may change the alignment at that point.
69 DenseMap NewDestAlignments, NewSrcAlignments;
70
71 AssumptionTracker *AT;
72 ScalarEvolution *SE;
73 DominatorTree *DT;
74 const DataLayout *DL;
75
76 bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
77 const SCEV *&OffSCEV);
78 bool processAssumption(CallInst *I);
79 };
80 }
81
82 char AlignmentFromAssumptions::ID = 0;
83 static const char aip_name[] = "Alignment from assumptions";
84 INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
85 aip_name, false, false)
86 INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
87 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
88 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
89 INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
90 aip_name, false, false)
91
92 FunctionPass *llvm::createAlignmentFromAssumptionsPass() {
93 return new AlignmentFromAssumptions();
94 }
95
96 // Given an expression for the (constant) alignment, AlignSCEV, and an
97 // expression for the displacement between a pointer and the aligned address,
98 // DiffSCEV, compute the alignment of the displaced pointer if it can be
99 // reduced to a constant.
100 static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV,
101 const SCEV *AlignSCEV,
102 ScalarEvolution *SE) {
103 // DiffUnits = Diff % int64_t(Alignment)
104 const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV);
105 const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV);
106 const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV);
107
108 DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is " <<
109 *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n");
110
111 if (const SCEVConstant *ConstDUSCEV =
112 dyn_cast(DiffUnitsSCEV)) {
113 int64_t DiffUnits = ConstDUSCEV->getValue()->getSExtValue();
114
115 // If the displacement is an exact multiple of the alignment, then the
116 // displaced pointer has the same alignment as the aligned pointer, so
117 // return the alignment value.
118 if (!DiffUnits)
119 return (unsigned)
120 cast(AlignSCEV)->getValue()->getSExtValue();
121
122 // If the displacement is not an exact multiple, but the remainder is a
123 // constant, then return this remainder (but only if it is a power of 2).
124 uint64_t DiffUnitsAbs = abs64(DiffUnits);
125 if (isPowerOf2_64(DiffUnitsAbs))
126 return (unsigned) DiffUnitsAbs;
127 }
128
129 return 0;
130 }
131
132 // There is an address given by an offset OffSCEV from AASCEV which has an
133 // alignment AlignSCEV. Use that information, if possible, to compute a new
134 // alignment for Ptr.
135 static unsigned getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
136 const SCEV *OffSCEV, Value *Ptr,
137 ScalarEvolution *SE) {
138 const SCEV *PtrSCEV = SE->getSCEV(Ptr);
139 const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
140
141 // What we really want to know is the overall offset to the aligned
142 // address. This address is displaced by the provided offset.
143 DiffSCEV = SE->getMinusSCEV(DiffSCEV, OffSCEV);
144
145 DEBUG(dbgs() << "AFI: alignment of " << *Ptr << " relative to " <<
146 *AlignSCEV << " and offset " << *OffSCEV <<
147 " using diff " << *DiffSCEV << "\n");
148
149 unsigned NewAlignment = getNewAlignmentDiff(DiffSCEV, AlignSCEV, SE);
150 DEBUG(dbgs() << "\tnew alignment: " << NewAlignment << "\n");
151
152 if (NewAlignment) {
153 return NewAlignment;
154 } else if (const SCEVAddRecExpr *DiffARSCEV =
155 dyn_cast(DiffSCEV)) {
156 // The relative offset to the alignment assumption did not yield a constant,
157 // but we should try harder: if we assume that a is 32-byte aligned, then in
158 // for (i = 0; i < 1024; i += 4) r += a[i]; not all of the loads from a are
159 // 32-byte aligned, but instead alternate between 32 and 16-byte alignment.
160 // As a result, the new alignment will not be a constant, but can still
161 // be improved over the default (of 4) to 16.
162
163 const SCEV *DiffStartSCEV = DiffARSCEV->getStart();
164 const SCEV *DiffIncSCEV = DiffARSCEV->getStepRecurrence(*SE);
165
166 DEBUG(dbgs() << "\ttrying start/inc alignment using start " <<
167 *DiffStartSCEV << " and inc " << *DiffIncSCEV << "\n");
168
169 // Now compute the new alignment using the displacement to the value in the
170 // first iteration, and also the alignment using the per-iteration delta.
171 // If these are the same, then use that answer. Otherwise, use the smaller
172 // one, but only if it divides the larger one.
173 NewAlignment = getNewAlignmentDiff(DiffStartSCEV, AlignSCEV, SE);
174 unsigned NewIncAlignment = getNewAlignmentDiff(DiffIncSCEV, AlignSCEV, SE);
175
176 DEBUG(dbgs() << "\tnew start alignment: " << NewAlignment << "\n");
177 DEBUG(dbgs() << "\tnew inc alignment: " << NewIncAlignment << "\n");
178
179 if (NewAlignment > NewIncAlignment) {
180 if (NewAlignment % NewIncAlignment == 0) {
181 DEBUG(dbgs() << "\tnew start/inc alignment: " <<
182 NewIncAlignment << "\n");
183 return NewIncAlignment;
184 }
185 } else if (NewIncAlignment > NewAlignment) {
186 if (NewIncAlignment % NewAlignment == 0) {
187 DEBUG(dbgs() << "\tnew start/inc alignment: " <<
188 NewAlignment << "\n");
189 return NewAlignment;
190 }
191 } else if (NewIncAlignment == NewAlignment && NewIncAlignment) {
192 DEBUG(dbgs() << "\tnew start/inc alignment: " <<
193 NewAlignment << "\n");
194 return NewAlignment;
195 }
196 }
197
198 return 0;
199 }
200
201 bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
202 Value *&AAPtr, const SCEV *&AlignSCEV,
203 const SCEV *&OffSCEV) {
204 // An alignment assume must be a statement about the least-significant
205 // bits of the pointer being zero, possibly with some offset.
206 ICmpInst *ICI = dyn_cast(I->getArgOperand(0));
207 if (!ICI)
208 return false;
209
210 // This must be an expression of the form: x & m == 0.
211 if (ICI->getPredicate() != ICmpInst::ICMP_EQ)
212 return false;
213
214 // Swap things around so that the RHS is 0.
215 Value *CmpLHS = ICI->getOperand(0);
216 Value *CmpRHS = ICI->getOperand(1);
217 const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS);
218 const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS);
219 if (CmpLHSSCEV->isZero())
220 std::swap(CmpLHS, CmpRHS);
221 else if (!CmpRHSSCEV->isZero())
222 return false;
223
224 BinaryOperator *CmpBO = dyn_cast(CmpLHS);
225 if (!CmpBO || CmpBO->getOpcode() != Instruction::And)
226 return false;
227
228 // Swap things around so that the right operand of the and is a constant
229 // (the mask); we cannot deal with variable masks.
230 Value *AndLHS = CmpBO->getOperand(0);
231 Value *AndRHS = CmpBO->getOperand(1);
232 const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS);
233 const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS);
234 if (isa(AndLHSSCEV)) {
235 std::swap(AndLHS, AndRHS);
236 std::swap(AndLHSSCEV, AndRHSSCEV);
237 }
238
239 const SCEVConstant *MaskSCEV = dyn_cast(AndRHSSCEV);
240 if (!MaskSCEV)
241 return false;
242
243 // The mask must have some trailing ones (otherwise the condition is
244 // trivial and tells us nothing about the alignment of the left operand).
245 unsigned TrailingOnes =
246 MaskSCEV->getValue()->getValue().countTrailingOnes();
247 if (!TrailingOnes)
248 return false;
249
250 // Cap the alignment at the maximum with which LLVM can deal (and make sure
251 // we don't overflow the shift).
252 uint64_t Alignment;
253 TrailingOnes = std::min(TrailingOnes,
254 unsigned(sizeof(unsigned) * CHAR_BIT - 1));
255 Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment);
256
257 Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext());
258 AlignSCEV = SE->getConstant(Int64Ty, Alignment);
259
260 // The LHS might be a ptrtoint instruction, or it might be the pointer
261 // with an offset.
262 AAPtr = nullptr;
263 OffSCEV = nullptr;
264 if (PtrToIntInst *PToI = dyn_cast(AndLHS)) {
265 AAPtr = PToI->getPointerOperand();
266 OffSCEV = SE->getConstant(Int64Ty, 0);
267 } else if (const SCEVAddExpr* AndLHSAddSCEV =
268 dyn_cast(AndLHSSCEV)) {
269 // Try to find the ptrtoint; subtract it and the rest is the offset.
270 for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(),
271 JE = AndLHSAddSCEV->op_end(); J != JE; ++J)
272 if (const SCEVUnknown *OpUnk = dyn_cast(*J))
273 if (PtrToIntInst *PToI = dyn_cast(OpUnk->getValue())) {
274 AAPtr = PToI->getPointerOperand();
275 OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
276 break;
277 }
278 }
279
280 if (!AAPtr)
281 return false;
282
283 // Sign extend the offset to 64 bits (so that it is like all of the other
284 // expressions).
285 unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
286 if (OffSCEVBits < 64)
287 OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
288 else if (OffSCEVBits > 64)
289 return false;
290
291 AAPtr = AAPtr->stripPointerCasts();
292 return true;
293 }
294
295 bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
296 Value *AAPtr;
297 const SCEV *AlignSCEV, *OffSCEV;
298 if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
299 return false;
300
301 const SCEV *AASCEV = SE->getSCEV(AAPtr);
302
303 // Apply the assumption to all other users of the specified pointer.
304 SmallPtrSet Visited;
305 SmallVector WorkList;
306 for (User *J : AAPtr->users()) {
307 if (J == ACall)
308 continue;
309
310 if (Instruction *K = dyn_cast(J))
311 if (isValidAssumeForContext(ACall, K, DL, DT))
312 WorkList.push_back(K);
313 }
314
315 while (!WorkList.empty()) {
316 Instruction *J = WorkList.pop_back_val();
317
318 if (LoadInst *LI = dyn_cast(J)) {
319 unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
320 LI->getPointerOperand(), SE);
321
322 if (NewAlignment > LI->getAlignment()) {
323 LI->setAlignment(NewAlignment);
324 ++NumLoadAlignChanged;
325 }
326 } else if (StoreInst *SI = dyn_cast(J)) {
327 unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
328 SI->getPointerOperand(), SE);
329
330 if (NewAlignment > SI->getAlignment()) {
331 SI->setAlignment(NewAlignment);
332 ++NumStoreAlignChanged;
333 }
334 } else if (MemIntrinsic *MI = dyn_cast(J)) {
335 unsigned NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
336 MI->getDest(), SE);
337
338 // For memory transfers, we need a common alignment for both the
339 // source and destination. If we have a new alignment for this
340 // instruction, but only for one operand, save it. If we reach the
341 // other operand through another assumption later, then we may
342 // change the alignment at that point.
343 if (MemTransferInst *MTI = dyn_cast(MI)) {
344 unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
345 MTI->getSource(), SE);
346
347 DenseMap::iterator DI =
348 NewDestAlignments.find(MTI);
349 unsigned AltDestAlignment = (DI == NewDestAlignments.end()) ?
350 0 : DI->second;
351
352 DenseMap::iterator SI =
353 NewSrcAlignments.find(MTI);
354 unsigned AltSrcAlignment = (SI == NewSrcAlignments.end()) ?
355 0 : SI->second;
356
357 DEBUG(dbgs() << "\tmem trans: " << NewDestAlignment << " " <<
358 AltDestAlignment << " " << NewSrcAlignment <<
359 " " << AltSrcAlignment << "\n");
360
361 // Of these four alignments, pick the largest possible...
362 unsigned NewAlignment = 0;
363 if (NewDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment))
364 NewAlignment = std::max(NewAlignment, NewDestAlignment);
365 if (AltDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment))
366 NewAlignment = std::max(NewAlignment, AltDestAlignment);
367 if (NewSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
368 NewAlignment = std::max(NewAlignment, NewSrcAlignment);
369 if (AltSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
370 NewAlignment = std::max(NewAlignment, AltSrcAlignment);
371
372 if (NewAlignment > MI->getAlignment()) {
373 MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
374 MI->getParent()->getContext()), NewAlignment));
375 ++NumMemIntAlignChanged;
376 }
377
378 NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment));
379 NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment));
380 } else if (NewDestAlignment > MI->getAlignment()) {
381 assert((!isa(MI) || isa(MI)) &&
382 "Unknown memory intrinsic");
383
384 MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
385 MI->getParent()->getContext()), NewDestAlignment));
386 ++NumMemIntAlignChanged;
387 }
388 }
389
390 // Now that we've updated that use of the pointer, look for other uses of
391 // the pointer to update.
392 Visited.insert(J);
393 for (User *UJ : J->users()) {
394 Instruction *K = cast(UJ);
395 if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DL, DT))
396 WorkList.push_back(K);
397 }
398 }
399
400 return true;
401 }
402
403 bool AlignmentFromAssumptions::runOnFunction(Function &F) {
404 bool Changed = false;
405 AT = &getAnalysis();
406 SE = &getAnalysis();
407 DT = &getAnalysis().getDomTree();
408 DataLayoutPass *DLP = getAnalysisIfAvailable();
409 DL = DLP ? &DLP->getDataLayout() : nullptr;
410
411 NewDestAlignments.clear();
412 NewSrcAlignments.clear();
413
414 for (auto &I : AT->assumptions(&F))
415 Changed |= processAssumption(I);
416
417 return Changed;
418 }
419
0 add_llvm_library(LLVMScalarOpts
11 ADCE.cpp
2 AlignmentFromAssumptions.cpp
23 ConstantHoisting.cpp
34 ConstantProp.cpp
45 CorrelatedValuePropagation.cpp
2727 /// ScalarOpts library.
2828 void llvm::initializeScalarOpts(PassRegistry &Registry) {
2929 initializeADCEPass(Registry);
30 initializeAlignmentFromAssumptionsPass(Registry);
3031 initializeSampleProfileLoaderPass(Registry);
3132 initializeConstantHoistingPass(Registry);
3233 initializeConstantPropagationPass(Registry);
7778 unwrap(PM)->add(createAggressiveDCEPass());
7879 }
7980
81 void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
82 unwrap(PM)->add(createAlignmentFromAssumptionsPass());
83 }
84
8085 void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
8186 unwrap(PM)->add(createCFGSimplificationPass());
8287 }
0 target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
1 ; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s
2
3 define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
4 entry:
5 %ptrint = ptrtoint i32* %a to i64
6 %maskedptr = and i64 %ptrint, 31
7 %maskcond = icmp eq i64 %maskedptr, 0
8 tail call void @llvm.assume(i1 %maskcond)
9 %0 = load i32* %a, align 4
10 ret i32 %0
11
12 ; CHECK-LABEL: @foo
13 ; CHECK: load i32* {{[^,]+}}, align 32
14 ; CHECK: ret i32
15 }
16
17 define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
18 entry:
19 %ptrint = ptrtoint i32* %a to i64
20 %offsetptr = add i64 %ptrint, 24
21 %maskedptr = and i64 %offsetptr, 31
22 %maskcond = icmp eq i64 %maskedptr, 0
23 tail call void @llvm.assume(i1 %maskcond)
24 %arrayidx = getelementptr inbounds i32* %a, i64 2
25 %0 = load i32* %arrayidx, align 4
26 ret i32 %0
27
28 ; CHECK-LABEL: @foo2
29 ; CHECK: load i32* {{[^,]+}}, align 16
30 ; CHECK: ret i32
31 }
32
33 define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
34 entry:
35 %ptrint = ptrtoint i32* %a to i64
36 %offsetptr = add i64 %ptrint, 28
37 %maskedptr = and i64 %offsetptr, 31
38 %maskcond = icmp eq i64 %maskedptr, 0
39 tail call void @llvm.assume(i1 %maskcond)
40 %arrayidx = getelementptr inbounds i32* %a, i64 -1
41 %0 = load i32* %arrayidx, align 4
42 ret i32 %0
43
44 ; CHECK-LABEL: @foo2a
45 ; CHECK: load i32* {{[^,]+}}, align 32
46 ; CHECK: ret i32
47 }
48
49 define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
50 entry:
51 %ptrint = ptrtoint i32* %a to i64
52 %maskedptr = and i64 %ptrint, 31
53 %maskcond = icmp eq i64 %maskedptr, 0
54 tail call void @llvm.assume(i1 %maskcond)
55 %0 = load i32* %a, align 4
56 ret i32 %0
57
58 ; CHECK-LABEL: @goo
59 ; CHECK: load i32* {{[^,]+}}, align 32
60 ; CHECK: ret i32
61 }
62
63 define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
64 entry:
65 %ptrint = ptrtoint i32* %a to i64
66 %maskedptr = and i64 %ptrint, 31
67 %maskcond = icmp eq i64 %maskedptr, 0
68 tail call void @llvm.assume(i1 %maskcond)
69 br label %for.body
70
71 for.body: ; preds = %entry, %for.body
72 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
73 %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
74 %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
75 %0 = load i32* %arrayidx, align 4
76 %add = add nsw i32 %0, %r.06
77 %indvars.iv.next = add i64 %indvars.iv, 8
78 %1 = trunc i64 %indvars.iv.next to i32
79 %cmp = icmp slt i32 %1, 2048
80 br i1 %cmp, label %for.body, label %for.end
81
82 for.end: ; preds = %for.body
83 %add.lcssa = phi i32 [ %add, %for.body ]
84 ret i32 %add.lcssa
85
86 ; CHECK-LABEL: @hoo
87 ; CHECK: load i32* %arrayidx, align 32
88 ; CHECK: ret i32 %add.lcssa
89 }
90
91 define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
92 entry:
93 %ptrint = ptrtoint i32* %a to i64
94 %maskedptr = and i64 %ptrint, 31
95 %maskcond = icmp eq i64 %maskedptr, 0
96 tail call void @llvm.assume(i1 %maskcond)
97 br label %for.body
98
99 for.body: ; preds = %entry, %for.body
100 %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
101 %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
102 %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
103 %0 = load i32* %arrayidx, align 4
104 %add = add nsw i32 %0, %r.06
105 %indvars.iv.next = add i64 %indvars.iv, 8
106 %1 = trunc i64 %indvars.iv.next to i32
107 %cmp = icmp slt i32 %1, 2048
108 br i1 %cmp, label %for.body, label %for.end
109
110 for.end: ; preds = %for.body
111 %add.lcssa = phi i32 [ %add, %for.body ]
112 ret i32 %add.lcssa
113
114 ; CHECK-LABEL: @joo
115 ; CHECK: load i32* %arrayidx, align 16
116 ; CHECK: ret i32 %add.lcssa
117 }
118
119 define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
120 entry:
121 %ptrint = ptrtoint i32* %a to i64
122 %maskedptr = and i64 %ptrint, 31
123 %maskcond = icmp eq i64 %maskedptr, 0
124 tail call void @llvm.assume(i1 %maskcond)
125 br label %for.body
126
127 for.body: ; preds = %entry, %for.body
128 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
129 %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
130 %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
131 %0 = load i32* %arrayidx, align 4
132 %add = add nsw i32 %0, %r.06
133 %indvars.iv.next = add i64 %indvars.iv, 4
134 %1 = trunc i64 %indvars.iv.next to i32
135 %cmp = icmp slt i32 %1, 2048
136 br i1 %cmp, label %for.body, label %for.end
137
138 for.end: ; preds = %for.body
139 %add.lcssa = phi i32 [ %add, %for.body ]
140 ret i32 %add.lcssa
141
142 ; CHECK-LABEL: @koo
143 ; CHECK: load i32* %arrayidx, align 16
144 ; CHECK: ret i32 %add.lcssa
145 }
146
147 define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
148 entry:
149 %ptrint = ptrtoint i32* %a to i64
150 %maskedptr = and i64 %ptrint, 31
151 %maskcond = icmp eq i64 %maskedptr, 0
152 tail call void @llvm.assume(i1 %maskcond)
153 br label %for.body
154
155 for.body: ; preds = %entry, %for.body
156 %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
157 %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
158 %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
159 %0 = load i32* %arrayidx, align 4
160 %add = add nsw i32 %0, %r.06
161 %indvars.iv.next = add i64 %indvars.iv, 4
162 %1 = trunc i64 %indvars.iv.next to i32
163 %cmp = icmp slt i32 %1, 2048
164 br i1 %cmp, label %for.body, label %for.end
165
166 for.end: ; preds = %for.body
167 %add.lcssa = phi i32 [ %add, %for.body ]
168 ret i32 %add.lcssa
169
170 ; CHECK-LABEL: @koo2
171 ; CHECK: load i32* %arrayidx, align 16
172 ; CHECK: ret i32 %add.lcssa
173 }
174
175 define i32 @moo(i32* nocapture %a) nounwind uwtable {
176 entry:
177 %ptrint = ptrtoint i32* %a to i64
178 %maskedptr = and i64 %ptrint, 31
179 %maskcond = icmp eq i64 %maskedptr, 0
180 tail call void @llvm.assume(i1 %maskcond)
181 %0 = bitcast i32* %a to i8*
182 tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false)
183 ret i32 undef
184
185 ; CHECK-LABEL: @moo
186 ; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false)
187 ; CHECK: ret i32 undef
188 }
189
190 define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
191 entry:
192 %ptrint = ptrtoint i32* %a to i64
193 %maskedptr = and i64 %ptrint, 31
194 %maskcond = icmp eq i64 %maskedptr, 0
195 tail call void @llvm.assume(i1 %maskcond)
196 %ptrint1 = ptrtoint i32* %b to i64
197 %maskedptr3 = and i64 %ptrint1, 127
198 %maskcond4 = icmp eq i64 %maskedptr3, 0
199 tail call void @llvm.assume(i1 %maskcond4)
200 %0 = bitcast i32* %a to i8*
201 %1 = bitcast i32* %b to i8*
202 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
203 ret i32 undef
204
205 ; CHECK-LABEL: @moo2
206 ; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false)
207 ; CHECK: ret i32 undef
208 }
209
210 declare void @llvm.assume(i1) nounwind
211
212 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
213 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
214