llvm.org GIT mirror llvm / 2d8135f
Re-submitting changes in D51550 because it failed to patch. Reviewers: javed.absar, trentxintong, courbet Reviewed By: trentxintong Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D52433 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@342919 91177308-0d34-0410-b5e6-96231b3b80d8 Christy Lee 2 years ago
5 changed file(s) with 75 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
4040
4141 #define DEBUG_TYPE "mergeicmps"
4242
43 // Returns true if the instruction is a simple load or a simple store
44 static bool isSimpleLoadOrStore(const Instruction *I) {
45 if (const LoadInst *LI = dyn_cast(I))
46 return LI->isSimple();
47 if (const StoreInst *SI = dyn_cast(I))
48 return SI->isSimple();
49 return false;
50 }
51
4352 // A BCE atom.
4453 struct BCEAtom {
4554 BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
150159
151160 // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
152161 // instructions in the block.
153 bool canSplit() const;
162 bool canSplit(AliasAnalysis *AA) const;
154163
155164 // Return true if this all the relevant instructions in the BCE-cmp-block can
156165 // be sunk below this instruction. By doing this, we know we can separate the
157166 // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
158167 // block.
159 bool canSinkBCECmpInst(const Instruction *, DenseSet &) const;
168 bool canSinkBCECmpInst(const Instruction *, DenseSet &,
169 AliasAnalysis *AA) const;
160170
161171 // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
162172 // instructions. Split the old block and move all non-BCE-cmp-insts into the
163173 // new parent block.
164 void split(BasicBlock *NewParent) const;
174 void split(BasicBlock *NewParent, AliasAnalysis *AA) const;
165175
166176 // The basic block where this comparison happens.
167177 BasicBlock *BB = nullptr;
179189 };
180190
181191 bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
182 DenseSet &BlockInsts) const {
192 DenseSet &BlockInsts,
193 AliasAnalysis *AA) const {
183194 // If this instruction has side effects and its in middle of the BCE cmp block
184195 // instructions, then bail for now.
185 // TODO: use alias analysis to tell whether there is real interference.
186 if (Inst->mayHaveSideEffects())
187 return false;
196 if (Inst->mayHaveSideEffects()) {
197 // Bail if this is not a simple load or store
198 if (!isSimpleLoadOrStore(Inst))
199 return false;
200 // Disallow stores that might alias the BCE operands
201 MemoryLocation LLoc = MemoryLocation::get(Lhs_.LoadI);
202 MemoryLocation RLoc = MemoryLocation::get(Rhs_.LoadI);
203 if (isModSet(AA->getModRefInfo(Inst, LLoc)) ||
204 isModSet(AA->getModRefInfo(Inst, RLoc)))
205 return false;
206 }
188207 // Make sure this instruction does not use any of the BCE cmp block
189208 // instructions as operand.
190209 for (auto BI : BlockInsts) {
194213 return true;
195214 }
196215
197 void BCECmpBlock::split(BasicBlock *NewParent) const {
216 void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
198217 DenseSet BlockInsts(
199218 {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
200219 llvm::SmallVector OtherInsts;
201220 for (Instruction &Inst : *BB) {
202221 if (BlockInsts.count(&Inst))
203222 continue;
204 assert(canSinkBCECmpInst(&Inst, BlockInsts) && "Split unsplittable block");
223 assert(canSinkBCECmpInst(&Inst, BlockInsts, AA) &&
224 "Split unsplittable block");
205225 // This is a non-BCE-cmp-block instruction. And it can be separated
206226 // from the BCE-cmp-block instruction.
207227 OtherInsts.push_back(&Inst);
213233 }
214234 }
215235
216 bool BCECmpBlock::canSplit() const {
236 bool BCECmpBlock::canSplit(AliasAnalysis *AA) const {
217237 DenseSet BlockInsts(
218238 {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
219239 for (Instruction &Inst : *BB) {
220240 if (!BlockInsts.count(&Inst)) {
221 if (!canSinkBCECmpInst(&Inst, BlockInsts))
241 if (!canSinkBCECmpInst(&Inst, BlockInsts, AA))
222242 return false;
223243 }
224244 }
324344 // A chain of comparisons.
325345 class BCECmpChain {
326346 public:
327 BCECmpChain(const std::vector &Blocks, PHINode &Phi);
347 BCECmpChain(const std::vector &Blocks, PHINode &Phi,
348 AliasAnalysis *AA);
328349
329350 int size() const { return Comparisons_.size(); }
330351
332353 void dump() const;
333354 #endif // MERGEICMPS_DOT_ON
334355
335 bool simplify(const TargetLibraryInfo *const TLI);
356 bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
336357
337358 private:
338359 static bool IsContiguous(const BCECmpBlock &First,
348369 // null, the merged block will link to the phi block.
349370 void mergeComparisons(ArrayRef Comparisons,
350371 BasicBlock *const NextBBInChain, PHINode &Phi,
351 const TargetLibraryInfo *const TLI);
372 const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
352373
353374 PHINode &Phi_;
354375 std::vector Comparisons_;
356377 BasicBlock *EntryBlock_;
357378 };
358379
359 BCECmpChain::BCECmpChain(const std::vector &Blocks, PHINode &Phi)
380 BCECmpChain::BCECmpChain(const std::vector &Blocks, PHINode &Phi,
381 AliasAnalysis *AA)
360382 : Phi_(Phi) {
361383 assert(!Blocks.empty() && "a chain should have at least one block");
362384 // Now look inside blocks to check for BCE comparisons.
388410 // and start anew.
389411 //
390412 // NOTE: we only handle block with single predecessor for now.
391 if (Comparison.canSplit()) {
413 if (Comparison.canSplit(AA)) {
392414 LLVM_DEBUG(dbgs()
393415 << "Split initial block '" << Comparison.BB->getName()
394416 << "' that does extra work besides compare\n");
475497 }
476498 #endif // MERGEICMPS_DOT_ON
477499
478 bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) {
500 bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
501 AliasAnalysis *AA) {
479502 // First pass to check if there is at least one merge. If not, we don't do
480503 // anything and we keep analysis passes intact.
481504 {
523546 // Merge all previous comparisons and start a new merge block.
524547 mergeComparisons(
525548 makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
526 Comparisons_[I].BB, Phi_, TLI);
549 Comparisons_[I].BB, Phi_, TLI, AA);
527550 NumMerged = 1;
528551 }
529552 }
530553 mergeComparisons(makeArrayRef(Comparisons_)
531554 .slice(Comparisons_.size() - NumMerged, NumMerged),
532 nullptr, Phi_, TLI);
555 nullptr, Phi_, TLI, AA);
533556
534557 return true;
535558 }
537560 void BCECmpChain::mergeComparisons(ArrayRef Comparisons,
538561 BasicBlock *const NextBBInChain,
539562 PHINode &Phi,
540 const TargetLibraryInfo *const TLI) {
563 const TargetLibraryInfo *const TLI,
564 AliasAnalysis *AA) {
541565 assert(!Comparisons.empty());
542566 const auto &FirstComparison = *Comparisons.begin();
543567 BasicBlock *const BB = FirstComparison.BB;
550574 auto C = std::find_if(Comparisons.begin(), Comparisons.end(),
551575 [](const BCECmpBlock &B) { return B.RequireSplit; });
552576 if (C != Comparisons.end())
553 C->split(EntryBlock_);
577 C->split(EntryBlock_, AA);
554578
555579 LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
556580 const auto TotalSize =
666690 return Blocks;
667691 }
668692
669 bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) {
693 bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
694 AliasAnalysis *AA) {
670695 LLVM_DEBUG(dbgs() << "processPhi()\n");
671696 if (Phi.getNumIncomingValues() <= 1) {
672697 LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n");
724749 const auto Blocks =
725750 getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues());
726751 if (Blocks.empty()) return false;
727 BCECmpChain CmpChain(Blocks, Phi);
752 BCECmpChain CmpChain(Blocks, Phi, AA);
728753
729754 if (CmpChain.size() < 2) {
730755 LLVM_DEBUG(dbgs() << "skip: only one compare block\n");
731756 return false;
732757 }
733758
734 return CmpChain.simplify(TLI);
759 return CmpChain.simplify(TLI, AA);
735760 }
736761
737762 class MergeICmps : public FunctionPass {
746771 if (skipFunction(F)) return false;
747772 const auto &TLI = getAnalysis().getTLI();
748773 const auto &TTI = getAnalysis().getTTI(F);
749 auto PA = runImpl(F, &TLI, &TTI);
774 AliasAnalysis *AA = &getAnalysis().getAAResults();
775 auto PA = runImpl(F, &TLI, &TTI, AA);
750776 return !PA.areAllPreserved();
751777 }
752778
754780 void getAnalysisUsage(AnalysisUsage &AU) const override {
755781 AU.addRequired();
756782 AU.addRequired();
783 AU.addRequired();
757784 }
758785
759786 PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
760 const TargetTransformInfo *TTI);
787 const TargetTransformInfo *TTI, AliasAnalysis *AA);
761788 };
762789
763790 PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
764 const TargetTransformInfo *TTI) {
791 const TargetTransformInfo *TTI,
792 AliasAnalysis *AA) {
765793 LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
766794
767795 // We only try merging comparisons if the target wants to expand memcmp later.
777805 for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
778806 // A Phi operation is always first in a basic block.
779807 if (auto *const Phi = dyn_cast(&*BBIt->begin()))
780 MadeChange |= processPhi(*Phi, TLI);
808 MadeChange |= processPhi(*Phi, TLI, AA);
781809 }
782810
783811 if (MadeChange) return PreservedAnalyses::none();
791819 "Merge contiguous icmps into a memcmp", false, false)
792820 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
793821 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
822 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
794823 INITIALIZE_PASS_END(MergeICmps, "mergeicmps",
795824 "Merge contiguous icmps into a memcmp", false, false)
796825
3131 ; CHECK-NEXT: Loop Pass Manager
3232 ; CHECK-NEXT: Induction Variable Users
3333 ; CHECK-NEXT: Loop Strength Reduction
34 ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
35 ; CHECK-NEXT: Function Alias Analysis Results
3436 ; CHECK-NEXT: Merge contiguous icmps into a memcmp
3537 ; CHECK-NEXT: Expand memcmp() to load/stores
3638 ; CHECK-NEXT: Lower Garbage Collection Instructions
1212 ; STOP-BEFORE-NOT: Loop Strength Reduction
1313
1414 ; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-AFTER
15 ; START-AFTER: -machine-branch-prob -mergeicmps
15 ; START-AFTER: -aa -mergeicmps
1616 ; START-AFTER: FunctionPass Manager
17 ; START-AFTER-NEXT: Merge contiguous icmps into a memcmp
17 ; START-AFTER-NEXT: Dominator Tree Construction
1818
1919 ; RUN: llc < %s -debug-pass=Structure -start-before=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-BEFORE
2020 ; START-BEFORE: -machine-branch-prob -domtree
2121 ; START-BEFORE: FunctionPass Manager
2222 ; START-BEFORE: Loop Strength Reduction
23 ; START-BEFORE-NEXT: Merge contiguous icmps into a memcmp
23 ; START-BEFORE-NEXT: Basic Alias Analysis (stateless AA impl)
2424
2525 ; RUN: not llc < %s -start-before=nonexistent -o /dev/null 2>&1 | FileCheck %s -check-prefix=NONEXISTENT-START-BEFORE
2626 ; RUN: not llc < %s -stop-before=nonexistent -o /dev/null 2>&1 | FileCheck %s -check-prefix=NONEXISTENT-STOP-BEFORE
2525 ; CHECK-NEXT: Loop Pass Manager
2626 ; CHECK-NEXT: Induction Variable Users
2727 ; CHECK-NEXT: Loop Strength Reduction
28 ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
29 ; CHECK-NEXT: Function Alias Analysis Results
2830 ; CHECK-NEXT: Merge contiguous icmps into a memcmp
2931 ; CHECK-NEXT: Expand memcmp() to load/stores
3032 ; CHECK-NEXT: Lower Garbage Collection Instructions
22
33 %"struct.std::pair" = type { i32, i32, i32, i32 }
44
5 ; Before patch D51550
6 define zeroext i1 @opeq1(
75 ; X86-LABEL: @opeq1(
86 ; X86-NEXT: entry:
97 ; X86-NEXT: [[PTR:%.*]] = alloca i32
8 ; X86-NEXT: store i32 42, i32* [[PTR]]
109 ; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
11 ; X86-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
1210 ; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
13 ; X86-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
14 ; X86-NEXT: store i32 42, i32* [[PTR]]
15 ; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
16 ; X86-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
11 ; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
12 ; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
13 ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
14 ; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
15 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
16 ; X86: opeq1.exit:
17 ; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
18 ; X86-NEXT: ret i1 [[TMP1]]
1719
20 define zeroext i1 @opeq1(
1821
1922 %"struct.std::pair"* nocapture readonly dereferenceable(16) %a,
2023 %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
24
2125 entry:
2226 %ptr = alloca i32
2327 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0