llvm.org GIT mirror llvm / 7034870
[SimplifyCFG] don't sink common insts too soon (PR34603) This should solve: https://bugs.llvm.org/show_bug.cgi?id=34603 ...by preventing SimplifyCFG from altering redundant instructions before early-cse has a chance to run. It changes the default (canonical-forming) behavior of SimplifyCFG, so we're only doing the sinking transform later in the optimization pipeline. Differential Revision: https://reviews.llvm.org/D38566 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320749 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 1 year, 9 months ago
15 changed file(s) with 56 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
3838 : SimplifyCFGPass(SimplifyCFGOptions()
3939 .forwardSwitchCondToPhi(false)
4040 .convertSwitchToLookupTable(false)
41 .needCanonicalLoops(true)) {}
41 .needCanonicalLoops(true)
42 .sinkCommonInsts(false)) {}
4243
4344
4445 /// Construct a pass with optional optimizations.
266266 //
267267 FunctionPass *createCFGSimplificationPass(
268268 unsigned Threshold = 1, bool ForwardSwitchCond = false,
269 bool ConvertSwitch = false, bool KeepLoops = true,
269 bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false,
270270 std::function Ftor = nullptr);
271271
272272 //===----------------------------------------------------------------------===//
6262 bool ForwardSwitchCondToPhi;
6363 bool ConvertSwitchToLookupTable;
6464 bool NeedCanonicalLoop;
65 bool SinkCommonInsts;
6566 AssumptionCache *AC;
6667
6768 SimplifyCFGOptions(unsigned BonusThreshold = 1,
6869 bool ForwardSwitchCond = false,
6970 bool SwitchToLookup = false, bool CanonicalLoops = true,
71 bool SinkCommon = false,
7072 AssumptionCache *AssumpCache = nullptr)
7173 : BonusInstThreshold(BonusThreshold),
7274 ForwardSwitchCondToPhi(ForwardSwitchCond),
7375 ConvertSwitchToLookupTable(SwitchToLookup),
74 NeedCanonicalLoop(CanonicalLoops), AC(AssumpCache) {}
76 NeedCanonicalLoop(CanonicalLoops),
77 SinkCommonInsts(SinkCommon),
78 AC(AssumpCache) {}
7579
7680 // Support 'builder' pattern to set members by name at construction time.
7781 SimplifyCFGOptions &bonusInstThreshold(int I) {
8892 }
8993 SimplifyCFGOptions &needCanonicalLoops(bool B) {
9094 NeedCanonicalLoop = B;
95 return *this;
96 }
97 SimplifyCFGOptions &sinkCommonInsts(bool B) {
98 SinkCommonInsts = B;
9199 return *this;
92100 }
93101 SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
746746 // Cleanup after the loop optimization passes.
747747 OptimizePM.addPass(InstCombinePass());
748748
749
750749 // Now that we've formed fast to execute loop structures, we do further
751750 // optimizations. These are run afterward as they might block doing complex
752751 // analyses and transforms such as what are needed for loop vectorization.
753752
753 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
754 // GVN, loop transforms, and others have already run, so it's now better to
755 // convert to more optimized IR using more aggressive simplify CFG options.
756 // The extra sinking transform can create larger basic blocks, so do this
757 // before SLP vectorization.
758 OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
759 forwardSwitchCondToPhi(true).
760 convertSwitchToLookupTable(true).
761 needCanonicalLoops(false).
762 sinkCommonInsts(true)));
763
754764 // Optimize parallel scalar instruction chains into SIMD instructions.
755765 OptimizePM.addPass(SLPVectorizerPass());
756766
757 // Cleanup after all of the vectorizers. Simplification passes like CVP and
758 // GVN, loop transforms, and others have already run, so it's now better to
759 // convert to more optimized IR using more aggressive simplify CFG options.
760 OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
761 forwardSwitchCondToPhi(true).
762 convertSwitchToLookupTable(true).
763 needCanonicalLoops(false)));
764767 OptimizePM.addPass(InstCombinePass());
765768
766769 // Unroll small loops to hide loop backedge latency and saturate any parallel
364364 // determine whether it succeeded. We can exploit existing control-flow in
365365 // ldrex/strex loops to simplify this, but it needs tidying up.
366366 if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
367 addPass(createCFGSimplificationPass(1, true, true, false));
367 addPass(createCFGSimplificationPass(1, true, true, false, true));
368368
369369 // Run LoopDataPrefetch
370370 //
384384 // ldrex/strex loops to simplify this, but it needs tidying up.
385385 if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
386386 addPass(createCFGSimplificationPass(
387 1, false, false, true, [this](const Function &F) {
387 1, false, false, true, true, [this](const Function &F) {
388388 const auto &ST = this->TM->getSubtarget(F);
389389 return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
390390 }));
629629 addInstructionCombiningPass(MPM);
630630 }
631631
632 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
633 // GVN, loop transforms, and others have already run, so it's now better to
634 // convert to more optimized IR using more aggressive simplify CFG options.
635 // The extra sinking transform can create larger basic blocks, so do this
636 // before SLP vectorization.
637 MPM.add(createCFGSimplificationPass(1, true, true, false, true));
638
632639 if (RunSLPAfterLoopVectorization && SLPVectorize) {
633640 MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
634641 if (OptLevel > 1 && ExtraVectorizerPasses) {
637644 }
638645
639646 addExtensionsToPM(EP_Peephole, MPM);
640 // Switches to lookup tables and other transforms that may not be considered
641 // canonical by other IR passes.
642 MPM.add(createCFGSimplificationPass(1, true, true, false));
643647 addInstructionCombiningPass(MPM);
644648
645649 if (!DisableUnrollLoops) {
6060 "forward-switch-cond", cl::Hidden, cl::init(false),
6161 cl::desc("Forward switch condition to phi ops (default = false)"));
6262
63 static cl::opt UserSinkCommonInsts(
64 "sink-common-insts", cl::Hidden, cl::init(false),
65 cl::desc("Sink common instructions (default = false)"));
66
67
6368 STATISTIC(NumSimpl, "Number of blocks simplified");
6469
6570 /// If we have more than one empty (other than phi node) return blocks,
204209 Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
205210 ? UserKeepLoops
206211 : Opts.NeedCanonicalLoop;
212 Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
213 ? UserSinkCommonInsts
214 : Opts.SinkCommonInsts;
207215 }
208216
209217 PreservedAnalyses SimplifyCFGPass::run(Function &F,
225233
226234 CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false,
227235 bool ConvertSwitch = false, bool KeepLoops = true,
236 bool SinkCommon = false,
228237 std::function Ftor = nullptr)
229238 : FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
230239
245254
246255 Options.NeedCanonicalLoop =
247256 UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops;
257
258 Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
259 ? UserSinkCommonInsts
260 : SinkCommon;
248261 }
249262
250263 bool runOnFunction(Function &F) override {
275288 FunctionPass *
276289 llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
277290 bool ConvertSwitch, bool KeepLoops,
291 bool SinkCommon,
278292 std::function Ftor) {
279293 return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch,
280 KeepLoops, std::move(Ftor));
281 }
294 KeepLoops, SinkCommon, std::move(Ftor));
295 }
57275727 BasicBlock *BB = BI->getParent();
57285728 BasicBlock *Succ = BI->getSuccessor(0);
57295729
5730 if (SinkCommon && SinkThenElseCodeToEnd(BI))
5730 if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI))
57315731 return true;
57325732
57335733 // If the Terminator is the only non-phi instruction, simplify the block.
None ; RUN: opt -simplifycfg -S < %s | FileCheck %s
0 ; RUN: opt -simplifycfg -sink-common-insts -S < %s | FileCheck %s
11
22 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
33 target triple = "x86_64-unknown-linux-gnu"
196196 ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass
197197 ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis
198198 ; CHECK-O-NEXT: Running pass: InstCombinePass
199 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
199200 ; CHECK-O-NEXT: Running pass: SLPVectorizerPass
200 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
201201 ; CHECK-O-NEXT: Running pass: InstCombinePass
202202 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
203203 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
184184 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopLoadEliminationPass
185185 ; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis
186186 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
187 ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
187188 ; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
188 ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
189189 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
190190 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
191191 ; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7575 ; ALL-NEXT: [[XI:%.*]] = load double, double* [[XI_PTR]], align 8
7676 ; ALL-NEXT: [[YI:%.*]] = load double, double* [[YI_PTR]], align 8
7777 ; ALL-NEXT: [[CMP:%.*]] = fcmp ogt double [[XI]], [[YI]]
78 ; ALL-NEXT: [[Y_SINK:%.*]] = select i1 [[CMP]], double* [[X]], double* [[Y]]
79 ; ALL-NEXT: [[YI_PTR_AGAIN:%.*]] = getelementptr double, double* [[Y_SINK]], i64 [[I]]
80 ; ALL-NEXT: [[YI_AGAIN:%.*]] = load double, double* [[YI_PTR_AGAIN]], align 8
81 ; ALL-NEXT: ret double [[YI_AGAIN]]
78 ; ALL-NEXT: [[XI_YI:%.*]] = select i1 [[CMP]], double [[XI]], double [[YI]]
79 ; ALL-NEXT: ret double [[XI_YI]]
8280 ;
8381 entry:
8482 %xi_ptr = getelementptr double, double* %x, i64 %i
None ; RUN: opt < %s -simplifycfg -S | FileCheck %s
0 ; RUN: opt < %s -simplifycfg -sink-common-insts -S | FileCheck %s
11
22 define i1 @test1(i1 zeroext %flag, i8* %y) #0 {
33 entry:
None ; RUN: opt < %s -simplifycfg -S | FileCheck -enable-var-scope %s
0 ; RUN: opt < %s -simplifycfg -sink-common-insts -S | FileCheck -enable-var-scope %s
11
22 define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
33 entry: