llvm.org GIT mirror llvm / e1ae008
Remove the LoadCombine pass. It was never enabled and is unsupported. Based on discussions with the author on mailing lists. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306067 91177308-0d34-0410-b5e6-96231b3b80d8 Eric Christopher 2 years ago
13 changed file(s) with 0 addition(s) and 674 deletion(s). Raw diff Collapse all Expand all
191191 void initializeLiveRegMatrixPass(PassRegistry&);
192192 void initializeLiveStacksPass(PassRegistry&);
193193 void initializeLiveVariablesPass(PassRegistry&);
194 void initializeLoadCombinePass(PassRegistry&);
195194 void initializeLoadStoreVectorizerPass(PassRegistry&);
196195 void initializeLoaderPassPass(PassRegistry&);
197196 void initializeLocalStackSlotPassPass(PassRegistry&);
148148 bool SLPVectorize;
149149 bool LoopVectorize;
150150 bool RerollLoops;
151 bool LoadCombine;
152151 bool NewGVN;
153152 bool DisableGVNLoadPRE;
154153 bool VerifyInput;
486486
487487 //===----------------------------------------------------------------------===//
488488 //
489 // LoadCombine - Combine loads into bigger loads.
490 //
491 BasicBlockPass *createLoadCombinePass();
492
493 //===----------------------------------------------------------------------===//
494 //
495489 // StraightLineStrengthReduce - This pass strength-reduces some certain
496490 // instruction patterns in straight-line code.
497491 //
922922 MainFPM.add(AlignmentFromAssumptionsPass());
923923 #endif
924924
925 // FIXME: Conditionally run LoadCombine here, after it's ported
926 // (in case we still have this pass, given its questionable usefulness).
927
928925 // FIXME: add peephole extensions to the PM here.
929926 MainFPM.addPass(InstCombinePass());
930927 MainFPM.addPass(JumpThreadingPass());
7171 RunLoopRerolling("reroll-loops", cl::Hidden,
7272 cl::desc("Run the loop rerolling pass"));
7373
74 static cl::opt RunLoadCombine("combine-loads", cl::init(false),
75 cl::Hidden,
76 cl::desc("Run the load combining pass"));
77
7874 static cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
7975 cl::desc("Run the NewGVN pass"));
8076
173169 SLPVectorize = RunSLPVectorization;
174170 LoopVectorize = RunLoopVectorization;
175171 RerollLoops = RunLoopRerolling;
176 LoadCombine = RunLoadCombine;
177172 NewGVN = RunNewGVN;
178173 DisableGVNLoadPRE = false;
179174 VerifyInput = false;
405400 MPM.add(createLoopUnrollPass(OptLevel));
406401 }
407402 }
408
409 if (LoadCombine)
410 MPM.add(createLoadCombinePass());
411403
412404 MPM.add(createAggressiveDCEPass()); // Delete dead instructions
413405 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
849841 // alignments.
850842 PM.add(createAlignmentFromAssumptionsPass());
851843
852 if (LoadCombine)
853 PM.add(createLoadCombinePass());
854
855844 // Cleanup and simplify the code after the scalar optimizations.
856845 addInstructionCombiningPass(PM);
857846 addExtensionsToPM(EP_Peephole, PM);
2121 LICM.cpp
2222 LoopAccessAnalysisPrinter.cpp
2323 LoopSink.cpp
24 LoadCombine.cpp
2524 LoopDeletion.cpp
2625 LoopDataPrefetch.cpp
2726 LoopDistribute.cpp
+0
-295
lib/Transforms/Scalar/LoadCombine.cpp less more
None //===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This transformation combines adjacent loads.
10 ///
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Analysis/AliasAnalysis.h"
16 #include "llvm/Analysis/AliasSetTracker.h"
17 #include "llvm/Analysis/GlobalsModRef.h"
18 #include "llvm/Analysis/TargetFolder.h"
19 #include "llvm/IR/DataLayout.h"
20 #include "llvm/IR/Dominators.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Pass.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/MathExtras.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Transforms/Scalar.h"
30
31 using namespace llvm;
32
33 #define DEBUG_TYPE "load-combine"
34
35 STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining");
36 STATISTIC(NumLoadsCombined, "Number of loads combined");
37
38 #define LDCOMBINE_NAME "Combine Adjacent Loads"
39
40 namespace {
41 struct PointerOffsetPair {
42 Value *Pointer;
43 APInt Offset;
44 };
45
46 struct LoadPOPPair {
47 LoadInst *Load;
48 PointerOffsetPair POP;
49 /// \brief The new load needs to be created before the first load in IR order.
50 unsigned InsertOrder;
51 };
52
53 class LoadCombine : public BasicBlockPass {
54 LLVMContext *C;
55 AliasAnalysis *AA;
56 DominatorTree *DT;
57
58 public:
59 LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
60 initializeLoadCombinePass(*PassRegistry::getPassRegistry());
61 }
62
63 using llvm::Pass::doInitialization;
64 bool doInitialization(Function &) override;
65 bool runOnBasicBlock(BasicBlock &BB) override;
66 void getAnalysisUsage(AnalysisUsage &AU) const override {
67 AU.setPreservesCFG();
68 AU.addRequired();
69 AU.addRequired();
70 AU.addPreserved();
71 }
72
73 StringRef getPassName() const override { return LDCOMBINE_NAME; }
74 static char ID;
75
76 typedef IRBuilder BuilderTy;
77
78 private:
79 BuilderTy *Builder;
80
81 PointerOffsetPair getPointerOffsetPair(LoadInst &);
82 bool combineLoads(DenseMap> &);
83 bool aggregateLoads(SmallVectorImpl &);
84 bool combineLoads(SmallVectorImpl &);
85 };
86 }
87
88 bool LoadCombine::doInitialization(Function &F) {
89 DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
90 C = &F.getContext();
91 return true;
92 }
93
94 PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
95 auto &DL = LI.getModule()->getDataLayout();
96
97 PointerOffsetPair POP;
98 POP.Pointer = LI.getPointerOperand();
99 unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace());
100 POP.Offset = APInt(BitWidth, 0);
101
102 while (isa(POP.Pointer) || isa(POP.Pointer)) {
103 if (auto *GEP = dyn_cast(POP.Pointer)) {
104 APInt LastOffset = POP.Offset;
105 if (!GEP->accumulateConstantOffset(DL, POP.Offset)) {
106 // Can't handle GEPs with variable indices.
107 POP.Offset = LastOffset;
108 return POP;
109 }
110 POP.Pointer = GEP->getPointerOperand();
111 } else if (auto *BC = dyn_cast(POP.Pointer)) {
112 POP.Pointer = BC->getOperand(0);
113 }
114 }
115 return POP;
116 }
117
118 bool LoadCombine::combineLoads(
119 DenseMap> &LoadMap) {
120 bool Combined = false;
121 for (auto &Loads : LoadMap) {
122 if (Loads.second.size() < 2)
123 continue;
124 std::sort(Loads.second.begin(), Loads.second.end(),
125 [](const LoadPOPPair &A, const LoadPOPPair &B) {
126 return A.POP.Offset.slt(B.POP.Offset);
127 });
128 if (aggregateLoads(Loads.second))
129 Combined = true;
130 }
131 return Combined;
132 }
133
134 /// \brief Try to aggregate loads from a sorted list of loads to be combined.
135 ///
136 /// It is guaranteed that no writes occur between any of the loads. All loads
137 /// have the same base pointer. There are at least two loads.
138 bool LoadCombine::aggregateLoads(SmallVectorImpl &Loads) {
139 assert(Loads.size() >= 2 && "Insufficient loads!");
140 LoadInst *BaseLoad = nullptr;
141 SmallVector AggregateLoads;
142 bool Combined = false;
143 bool ValidPrevOffset = false;
144 APInt PrevOffset;
145 uint64_t PrevSize = 0;
146 for (auto &L : Loads) {
147 if (ValidPrevOffset == false) {
148 BaseLoad = L.Load;
149 PrevOffset = L.POP.Offset;
150 PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
151 L.Load->getType());
152 AggregateLoads.push_back(L);
153 ValidPrevOffset = true;
154 continue;
155 }
156 if (L.Load->getAlignment() > BaseLoad->getAlignment())
157 continue;
158 APInt PrevEnd = PrevOffset + PrevSize;
159 if (L.POP.Offset.sgt(PrevEnd)) {
160 // No other load will be combinable
161 if (combineLoads(AggregateLoads))
162 Combined = true;
163 AggregateLoads.clear();
164 ValidPrevOffset = false;
165 continue;
166 }
167 if (L.POP.Offset != PrevEnd)
168 // This load is offset less than the size of the last load.
169 // FIXME: We may want to handle this case.
170 continue;
171 PrevOffset = L.POP.Offset;
172 PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
173 L.Load->getType());
174 AggregateLoads.push_back(L);
175 }
176 if (combineLoads(AggregateLoads))
177 Combined = true;
178 return Combined;
179 }
180
181 /// \brief Given a list of combinable load. Combine the maximum number of them.
182 bool LoadCombine::combineLoads(SmallVectorImpl &Loads) {
183 // Remove loads from the end while the size is not a power of 2.
184 unsigned TotalSize = 0;
185 for (const auto &L : Loads)
186 TotalSize += L.Load->getType()->getPrimitiveSizeInBits();
187 while (TotalSize != 0 && !isPowerOf2_32(TotalSize))
188 TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits();
189 if (Loads.size() < 2)
190 return false;
191
192 DEBUG({
193 dbgs() << "***** Combining Loads ******\n";
194 for (const auto &L : Loads) {
195 dbgs() << L.POP.Offset << ": " << *L.Load << "\n";
196 }
197 });
198
199 // Find first load. This is where we put the new load.
200 LoadPOPPair FirstLP;
201 FirstLP.InsertOrder = -1u;
202 for (const auto &L : Loads)
203 if (L.InsertOrder < FirstLP.InsertOrder)
204 FirstLP = L;
205
206 unsigned AddressSpace =
207 FirstLP.POP.Pointer->getType()->getPointerAddressSpace();
208
209 Builder->SetInsertPoint(FirstLP.Load);
210 Value *Ptr = Builder->CreateConstGEP1_64(
211 Builder->CreatePointerCast(Loads[0].POP.Pointer,
212 Builder->getInt8PtrTy(AddressSpace)),
213 Loads[0].POP.Offset.getSExtValue());
214 LoadInst *NewLoad = new LoadInst(
215 Builder->CreatePointerCast(
216 Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize),
217 Ptr->getType()->getPointerAddressSpace())),
218 Twine(Loads[0].Load->getName()) + ".combined", false,
219 Loads[0].Load->getAlignment(), FirstLP.Load);
220
221 for (const auto &L : Loads) {
222 Builder->SetInsertPoint(L.Load);
223 Value *V = Builder->CreateExtractInteger(
224 L.Load->getModule()->getDataLayout(), NewLoad,
225 cast(L.Load->getType()),
226 (L.POP.Offset - Loads[0].POP.Offset).getZExtValue(), "combine.extract");
227 L.Load->replaceAllUsesWith(V);
228 }
229
230 NumLoadsCombined += Loads.size();
231 return true;
232 }
233
234 bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
235 if (skipBasicBlock(BB))
236 return false;
237
238 AA = &getAnalysis().getAAResults();
239 DT = &getAnalysis().getDomTree();
240
241 // Skip analysing dead blocks (not forward reachable from function entry).
242 if (!DT->isReachableFromEntry(&BB)) {
243 DEBUG(dbgs() << "LC: skipping unreachable " << BB.getName() <<
244 " in " << BB.getParent()->getName() << "\n");
245 return false;
246 }
247
248 IRBuilder TheBuilder(
249 BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
250 Builder = &TheBuilder;
251
252 DenseMap> LoadMap;
253 AliasSetTracker AST(*AA);
254
255 bool Combined = false;
256 unsigned Index = 0;
257 for (auto &I : BB) {
258 if (I.mayThrow() || AST.containsUnknown(&I)) {
259 if (combineLoads(LoadMap))
260 Combined = true;
261 LoadMap.clear();
262 AST.clear();
263 continue;
264 }
265 if (I.mayWriteToMemory()) {
266 AST.add(&I);
267 continue;
268 }
269 LoadInst *LI = dyn_cast(&I);
270 if (!LI)
271 continue;
272 ++NumLoadsAnalyzed;
273 if (!LI->isSimple() || !LI->getType()->isIntegerTy())
274 continue;
275 auto POP = getPointerOffsetPair(*LI);
276 if (!POP.Pointer)
277 continue;
278 LoadMap[POP.Pointer].push_back({LI, std::move(POP), Index++});
279 AST.add(LI);
280 }
281 if (combineLoads(LoadMap))
282 Combined = true;
283 return Combined;
284 }
285
286 char LoadCombine::ID = 0;
287
288 BasicBlockPass *llvm::createLoadCombinePass() {
289 return new LoadCombine();
290 }
291
292 INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)
293 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
294 INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)
9090 initializeSeparateConstOffsetFromGEPPass(Registry);
9191 initializeSpeculativeExecutionLegacyPassPass(Registry);
9292 initializeStraightLineStrengthReducePass(Registry);
93 initializeLoadCombinePass(Registry);
9493 initializePlaceBackedgeSafepointsImplPass(Registry);
9594 initializePlaceSafepointsPass(Registry);
9695 initializeFloat2IntLegacyPassPass(Registry);
+0
-39
test/Transforms/LoadCombine/deadcode.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt -load-combine -S < %s | FileCheck %s
2
3 ; It has been detected that dead loops like the one in this test case can be
4 ; created by -jump-threading (it was detected by a csmith generated program).
5 ;
6 ; According to -verify this is valid input (even if it could be discussed if
7 ; the dead loop really satisfies SSA form).
8 ;
9 ; The problem found was that the -load-combine pass ends up in an infinite loop
10 ; when analysing the 'bb1' basic block.
11 define void @test1() {
12 ; CHECK-LABEL: @test1(
13 ; CHECK-NEXT: ret void
14 ; CHECK: bb1:
15 ; CHECK-NEXT: [[_TMP4:%.*]] = load i16, i16* [[_TMP10:%.*]], align 1
16 ; CHECK-NEXT: [[_TMP10]] = getelementptr i16, i16* [[_TMP10]], i16 1
17 ; CHECK-NEXT: br label [[BB1:%.*]]
18 ; CHECK: bb2:
19 ; CHECK-NEXT: [[_TMP7:%.*]] = load i16, i16* [[_TMP12:%.*]], align 1
20 ; CHECK-NEXT: [[_TMP12]] = getelementptr i16, i16* [[_TMP12]], i16 1
21 ; CHECK-NEXT: br label [[BB2:%.*]]
22 ;
23 ret void
24
25 bb1:
26 %_tmp4 = load i16, i16* %_tmp10, align 1
27 %_tmp10 = getelementptr i16, i16* %_tmp10, i16 1
28 br label %bb1
29
30 ; A second basic block. Running the test with -debug-pass=Executions shows
31 ; that we only run the Dominator Tree Construction one time for each function,
32 ; also when having multiple basic blocks in the function.
33 bb2:
34 %_tmp7 = load i16, i16* %_tmp12, align 1
35 %_tmp12 = getelementptr i16, i16* %_tmp12, i16 1
36 br label %bb2
37
38 }
+0
-63
test/Transforms/LoadCombine/load-combine-aa.ll less more
None ; RUN: opt -basicaa -load-combine -S < %s | FileCheck %s
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 define i64 @test1(i32* nocapture readonly noalias %a, i32* nocapture readonly noalias %b) {
5 ; CHECK-LABEL: @test1
6
7 ; CHECK: load i64, i64*
8 ; CHECK: ret i64
9
10 %load1 = load i32, i32* %a, align 4
11 %conv = zext i32 %load1 to i64
12 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
13 store i32 %load1, i32* %b, align 4
14 %load2 = load i32, i32* %arrayidx1, align 4
15 %conv2 = zext i32 %load2 to i64
16 %shl = shl nuw i64 %conv2, 32
17 %add = or i64 %shl, %conv
18 ret i64 %add
19 }
20
21 define i64 @test2(i32* nocapture readonly %a, i32* nocapture readonly %b) {
22 ; CHECK-LABEL: @test2
23
24 ; CHECK-NOT: load i64
25 ; CHECK: load i32, i32*
26 ; CHECK: load i32, i32*
27 ; CHECK: ret i64
28
29 %load1 = load i32, i32* %a, align 4
30 %conv = zext i32 %load1 to i64
31 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
32 store i32 %load1, i32* %b, align 4
33 %load2 = load i32, i32* %arrayidx1, align 4
34 %conv2 = zext i32 %load2 to i64
35 %shl = shl nuw i64 %conv2, 32
36 %add = or i64 %shl, %conv
37 ret i64 %add
38 }
39
40 %rec11 = type { i16, i16, i16 }
41 @str = global %rec11 { i16 1, i16 2, i16 3 }
42
43 ; PR31517 - Check that loads which span an aliasing store are not combined.
44 define i16 @test3() {
45 ; CHECK-LABEL: @test3
46
47 ; CHECK-NOT: load i32
48 ; CHECK: load i16, i16*
49 ; CHECK: store i16
50 ; CHECK: load i16, i16*
51 ; CHECK: ret i16
52
53 %_tmp9 = getelementptr %rec11, %rec11* @str, i16 0, i32 1
54 %_tmp10 = load i16, i16* %_tmp9
55 %_tmp12 = getelementptr %rec11, %rec11* @str, i16 0, i32 0
56 store i16 %_tmp10, i16* %_tmp12
57 %_tmp13 = getelementptr %rec11, %rec11* @str, i16 0, i32 0
58 %_tmp14 = load i16, i16* %_tmp13
59 %_tmp15 = icmp eq i16 %_tmp14, 3
60 %_tmp16 = select i1 %_tmp15, i16 1, i16 0
61 ret i16 %_tmp16
62 }
+0
-44
test/Transforms/LoadCombine/load-combine-assume.ll less more
None ; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 declare void @llvm.assume(i1) nounwind
5
6 ; 'load' before the 'call' gets optimized:
7 define i64 @test1(i32* nocapture readonly %a, i1 %b) {
8 ; CHECK-LABEL: @test1
9
10 ; CHECK-DAG: load i64, i64* %1, align 4
11 ; CHECK-DAG: tail call void @llvm.assume(i1 %b)
12 ; CHECK: ret i64
13
14 %load1 = load i32, i32* %a, align 4
15 %conv = zext i32 %load1 to i64
16 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
17 %load2 = load i32, i32* %arrayidx1, align 4
18 tail call void @llvm.assume(i1 %b)
19 %conv2 = zext i32 %load2 to i64
20 %shl = shl nuw i64 %conv2, 32
21 %add = or i64 %shl, %conv
22 ret i64 %add
23 }
24
25 ; 'call' before the 'load' doesn't get optimized:
26 define i64 @test2(i32* nocapture readonly %a, i1 %b) {
27 ; CHECK-LABEL: @test2
28
29 ; CHECK-DAG: load i64, i64* %1, align 4
30 ; CHECK-DAG: tail call void @llvm.assume(i1 %b)
31 ; CHECK: ret i64
32
33 %load1 = load i32, i32* %a, align 4
34 %conv = zext i32 %load1 to i64
35 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
36 tail call void @llvm.assume(i1 %b)
37 %load2 = load i32, i32* %arrayidx1, align 4
38 %conv2 = zext i32 %load2 to i64
39 %shl = shl nuw i64 %conv2, 32
40 %add = or i64 %shl, %conv
41 ret i64 %add
42 }
43
+0
-19
test/Transforms/LoadCombine/load-combine-negativegep.ll less more
None ; RUN: opt -basicaa -load-combine -S < %s | FileCheck %s
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 define i32 @Load_NegGep(i32* %i){
5 %1 = getelementptr inbounds i32, i32* %i, i64 -1
6 %2 = load i32, i32* %1, align 4
7 %3 = load i32, i32* %i, align 4
8 %4 = add nsw i32 %3, %2
9 ret i32 %4
10 ; CHECK-LABEL: @Load_NegGep(
11 ; CHECK: %[[load:.*]] = load i64
12 ; CHECK: %[[combine_extract_lo:.*]] = trunc i64 %[[load]] to i32
13 ; CHECK: %[[combine_extract_shift:.*]] = lshr i64 %[[load]], 32
14 ; CHECK: %[[combine_extract_hi:.*]] = trunc i64 %[[combine_extract_shift]] to i32
15 ; CHECK: %[[add:.*]] = add nsw i32 %[[combine_extract_hi]], %[[combine_extract_lo]]
16 }
17
18
+0
-190
test/Transforms/LoadCombine/load-combine.ll less more
None ; RUN: opt < %s -load-combine -instcombine -S | FileCheck %s
1
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3 target triple = "x86_64-unknown-linux-gnu"
4
5 ; Combine read from char* idiom.
6 define i64 @LoadU64_x64_0(i64* %pData) {
7 %1 = bitcast i64* %pData to i8*
8 %2 = load i8, i8* %1, align 1
9 %3 = zext i8 %2 to i64
10 %4 = shl nuw i64 %3, 56
11 %5 = getelementptr inbounds i8, i8* %1, i64 1
12 %6 = load i8, i8* %5, align 1
13 %7 = zext i8 %6 to i64
14 %8 = shl nuw nsw i64 %7, 48
15 %9 = or i64 %8, %4
16 %10 = getelementptr inbounds i8, i8* %1, i64 2
17 %11 = load i8, i8* %10, align 1
18 %12 = zext i8 %11 to i64
19 %13 = shl nuw nsw i64 %12, 40
20 %14 = or i64 %9, %13
21 %15 = getelementptr inbounds i8, i8* %1, i64 3
22 %16 = load i8, i8* %15, align 1
23 %17 = zext i8 %16 to i64
24 %18 = shl nuw nsw i64 %17, 32
25 %19 = or i64 %14, %18
26 %20 = getelementptr inbounds i8, i8* %1, i64 4
27 %21 = load i8, i8* %20, align 1
28 %22 = zext i8 %21 to i64
29 %23 = shl nuw nsw i64 %22, 24
30 %24 = or i64 %19, %23
31 %25 = getelementptr inbounds i8, i8* %1, i64 5
32 %26 = load i8, i8* %25, align 1
33 %27 = zext i8 %26 to i64
34 %28 = shl nuw nsw i64 %27, 16
35 %29 = or i64 %24, %28
36 %30 = getelementptr inbounds i8, i8* %1, i64 6
37 %31 = load i8, i8* %30, align 1
38 %32 = zext i8 %31 to i64
39 %33 = shl nuw nsw i64 %32, 8
40 %34 = or i64 %29, %33
41 %35 = getelementptr inbounds i8, i8* %1, i64 7
42 %36 = load i8, i8* %35, align 1
43 %37 = zext i8 %36 to i64
44 %38 = or i64 %34, %37
45 ret i64 %38
46 ; CHECK-LABEL: @LoadU64_x64_0(
47 ; CHECK: load i64, i64* %{{.*}}, align 1
48 ; CHECK-NOT: load
49 }
50
51 ; Combine simple adjacent loads.
52 define i32 @"2xi16_i32"(i16* %x) {
53 %1 = load i16, i16* %x, align 2
54 %2 = getelementptr inbounds i16, i16* %x, i64 1
55 %3 = load i16, i16* %2, align 2
56 %4 = zext i16 %3 to i32
57 %5 = shl nuw i32 %4, 16
58 %6 = zext i16 %1 to i32
59 %7 = or i32 %5, %6
60 ret i32 %7
61 ; CHECK-LABEL: @"2xi16_i32"(
62 ; CHECK: load i32, i32* %{{.*}}, align 2
63 ; CHECK-NOT: load
64 }
65
66 ; Don't combine loads across stores.
67 define i32 @"2xi16_i32_store"(i16* %x, i16* %y) {
68 %1 = load i16, i16* %x, align 2
69 store i16 0, i16* %y, align 2
70 %2 = getelementptr inbounds i16, i16* %x, i64 1
71 %3 = load i16, i16* %2, align 2
72 %4 = zext i16 %3 to i32
73 %5 = shl nuw i32 %4, 16
74 %6 = zext i16 %1 to i32
75 %7 = or i32 %5, %6
76 ret i32 %7
77 ; CHECK-LABEL: @"2xi16_i32_store"(
78 ; CHECK: load i16, i16* %{{.*}}, align 2
79 ; CHECK: store
80 ; CHECK: load i16, i16* %{{.*}}, align 2
81 }
82
83 ; Don't combine loads with a gap.
84 define i32 @"2xi16_i32_gap"(i16* %x) {
85 %1 = load i16, i16* %x, align 2
86 %2 = getelementptr inbounds i16, i16* %x, i64 2
87 %3 = load i16, i16* %2, align 2
88 %4 = zext i16 %3 to i32
89 %5 = shl nuw i32 %4, 16
90 %6 = zext i16 %1 to i32
91 %7 = or i32 %5, %6
92 ret i32 %7
93 ; CHECK-LABEL: @"2xi16_i32_gap"(
94 ; CHECK: load i16, i16* %{{.*}}, align 2
95 ; CHECK: load i16, i16* %{{.*}}, align 2
96 }
97
98 ; Combine out of order loads.
99 define i32 @"2xi16_i32_order"(i16* %x) {
100 %1 = getelementptr inbounds i16, i16* %x, i64 1
101 %2 = load i16, i16* %1, align 2
102 %3 = zext i16 %2 to i32
103 %4 = load i16, i16* %x, align 2
104 %5 = shl nuw i32 %3, 16
105 %6 = zext i16 %4 to i32
106 %7 = or i32 %5, %6
107 ret i32 %7
108 ; CHECK-LABEL: @"2xi16_i32_order"(
109 ; CHECK: load i32, i32* %{{.*}}, align 2
110 ; CHECK-NOT: load
111 }
112
113 ; Overlapping loads.
114 define i32 @"2xi16_i32_overlap"(i8* %x) {
115 %1 = bitcast i8* %x to i16*
116 %2 = load i16, i16* %1, align 2
117 %3 = getelementptr inbounds i8, i8* %x, i64 1
118 %4 = bitcast i8* %3 to i16*
119 %5 = load i16, i16* %4, align 2
120 %6 = zext i16 %5 to i32
121 %7 = shl nuw i32 %6, 16
122 %8 = zext i16 %2 to i32
123 %9 = or i32 %7, %8
124 ret i32 %9
125 ; CHECK-LABEL: @"2xi16_i32_overlap"(
126 ; CHECK: load i16, i16* %{{.*}}, align 2
127 ; CHECK: load i16, i16* %{{.*}}, align 2
128 }
129
130 ; Combine valid alignments.
131 define i64 @"2xi16_i64_align"(i8* %x) {
132 %1 = bitcast i8* %x to i32*
133 %2 = load i32, i32* %1, align 4
134 %3 = getelementptr inbounds i8, i8* %x, i64 4
135 %4 = bitcast i8* %3 to i16*
136 %5 = load i16, i16* %4, align 2
137 %6 = getelementptr inbounds i8, i8* %x, i64 6
138 %7 = bitcast i8* %6 to i16*
139 %8 = load i16, i16* %7, align 2
140 %9 = zext i16 %8 to i64
141 %10 = shl nuw i64 %9, 48
142 %11 = zext i16 %5 to i64
143 %12 = shl nuw nsw i64 %11, 32
144 %13 = zext i32 %2 to i64
145 %14 = or i64 %12, %13
146 %15 = or i64 %14, %10
147 ret i64 %15
148 ; CHECK-LABEL: @"2xi16_i64_align"(
149 ; CHECK: load i64, i64* %{{.*}}, align 4
150 }
151
152 ; Non power of two.
153 define i64 @"2xi16_i64_npo2"(i8* %x) {
154 %1 = load i8, i8* %x, align 1
155 %2 = zext i8 %1 to i64
156 %3 = getelementptr inbounds i8, i8* %x, i64 1
157 %4 = load i8, i8* %3, align 1
158 %5 = zext i8 %4 to i64
159 %6 = shl nuw nsw i64 %5, 8
160 %7 = or i64 %6, %2
161 %8 = getelementptr inbounds i8, i8* %x, i64 2
162 %9 = load i8, i8* %8, align 1
163 %10 = zext i8 %9 to i64
164 %11 = shl nuw nsw i64 %10, 16
165 %12 = or i64 %11, %7
166 %13 = getelementptr inbounds i8, i8* %x, i64 3
167 %14 = load i8, i8* %13, align 1
168 %15 = zext i8 %14 to i64
169 %16 = shl nuw nsw i64 %15, 24
170 %17 = or i64 %16, %12
171 %18 = getelementptr inbounds i8, i8* %x, i64 4
172 %19 = load i8, i8* %18, align 1
173 %20 = zext i8 %19 to i64
174 %21 = shl nuw nsw i64 %20, 32
175 %22 = or i64 %21, %17
176 %23 = getelementptr inbounds i8, i8* %x, i64 5
177 %24 = load i8, i8* %23, align 1
178 %25 = zext i8 %24 to i64
179 %26 = shl nuw nsw i64 %25, 40
180 %27 = or i64 %26, %22
181 %28 = getelementptr inbounds i8, i8* %x, i64 6
182 %29 = load i8, i8* %28, align 1
183 %30 = zext i8 %29 to i64
184 %31 = shl nuw nsw i64 %30, 48
185 %32 = or i64 %31, %27
186 ret i64 %32
187 ; CHECK-LABEL: @"2xi16_i64_npo2"(
188 ; CHECK: load i32, i32* %{{.*}}, align 1
189 }