llvm.org GIT mirror llvm / 193e898
[DivRempairs] add a pass to optimize div/rem pairs (PR31028) This is intended to be a superset of the functionality from D31037 (EarlyCSE) but implemented as an independent pass, so there's no stretching of scope and feature creep for an existing pass. I also proposed a weaker version of this for SimplifyCFG in D30910. And I initially had almost this same functionality as an addition to CGP in the motivating example of PR31028: https://bugs.llvm.org/show_bug.cgi?id=31028 The advantage of positioning this ahead of SimplifyCFG in the pass pipeline is that it can allow more flattening. But it needs to be after passes (InstCombine) that could sink a div/rem and undo the hoisting that is done here. Decomposing remainder may allow removing some code from the backend (PPC and possibly others). Differential Revision: https://reviews.llvm.org/D37121 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312862 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 2 years ago
17 changed file(s) with 647 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
482482 bool isLegalMaskedScatter(Type *DataType) const;
483483 bool isLegalMaskedGather(Type *DataType) const;
484484
485 /// Return true if the target has a unified operation to calculate division
486 /// and remainder. If so, the additional implicit multiplication and
487 /// subtraction required to calculate a remainder from division are free. This
488 /// can enable more aggressive transformations for division and remainder than
489 /// would typically be allowed using throughput or size cost models.
490 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
491
485492 /// Return true if target doesn't mind addresses in vectors.
486493 bool prefersVectorizedAddressing() const;
487494
959966 virtual bool isLegalMaskedLoad(Type *DataType) = 0;
960967 virtual bool isLegalMaskedScatter(Type *DataType) = 0;
961968 virtual bool isLegalMaskedGather(Type *DataType) = 0;
969 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
962970 virtual bool prefersVectorizedAddressing() = 0;
963971 virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
964972 int64_t BaseOffset, bool HasBaseReg,
11811189 bool isLegalMaskedGather(Type *DataType) override {
11821190 return Impl.isLegalMaskedGather(DataType);
11831191 }
1192 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1193 return Impl.hasDivRemOp(DataType, IsSigned);
1194 }
11841195 bool prefersVectorizedAddressing() override {
11851196 return Impl.prefersVectorizedAddressing();
11861197 }
249249 bool isLegalMaskedScatter(Type *DataType) { return false; }
250250
251251 bool isLegalMaskedGather(Type *DataType) { return false; }
252
253 bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
252254
253255 bool prefersVectorizedAddressing() { return true; }
254256
112112 void initializeDependenceAnalysisWrapperPassPass(PassRegistry&);
113113 void initializeDetectDeadLanesPass(PassRegistry&);
114114 void initializeDivergenceAnalysisPass(PassRegistry&);
115 void initializeDivRemPairsLegacyPassPass(PassRegistry&);
115116 void initializeDomOnlyPrinterPass(PassRegistry&);
116117 void initializeDomOnlyViewerPass(PassRegistry&);
117118 void initializeDomPrinterPass(PassRegistry&);
0 //===- DivRemPairs.h - Hoist/decompose integer division and remainder -----===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass hoists and/or decomposes integer division and remainder
10 // instructions to enable CFG improvements and better codegen.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_TRANSFORMS_SCALAR_DIVREMPAIRS_H
15 #define LLVM_TRANSFORMS_SCALAR_DIVREMPAIRS_H
16
17 #include "llvm/IR/PassManager.h"
18
19 namespace llvm {
20
21 /// Hoist/decompose integer division and remainder instructions to enable CFG
22 /// improvements and better codegen.
23 struct DivRemPairsPass : public PassInfoMixin {
24 public:
25 PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
26 };
27
28 }
29 #endif // LLVM_TRANSFORMS_SCALAR_DIVREMPAIRS_H
30
376376
377377 //===----------------------------------------------------------------------===//
378378 //
379 // DivRemPairs - Hoist/decompose integer division and remainder instructions.
380 //
381 FunctionPass *createDivRemPairsPass();
382
383 //===----------------------------------------------------------------------===//
384 //
379385 // MemCpyOpt - This pass performs optimizations related to eliminating memcpy
380386 // calls and/or combining multiple stores into memset's.
381387 //
173173
174174 bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
175175 return TTIImpl->isLegalMaskedScatter(DataType);
176 }
177
178 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
179 return TTIImpl->hasDivRemOp(DataType, IsSigned);
176180 }
177181
178182 bool TargetTransformInfo::prefersVectorizedAddressing() const {
9191 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
9292 #include "llvm/Transforms/Scalar/DCE.h"
9393 #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
94 #include "llvm/Transforms/Scalar/DivRemPairs.h"
9495 #include "llvm/Transforms/Scalar/EarlyCSE.h"
9596 #include "llvm/Transforms/Scalar/Float2Int.h"
9697 #include "llvm/Transforms/Scalar/GVN.h"
764765 // And finally clean up LCSSA form before generating code.
765766 OptimizePM.addPass(InstSimplifierPass());
766767
768 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
769 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
770 // flattening of blocks.
771 OptimizePM.addPass(DivRemPairsPass());
772
767773 // LoopSink (and other loop passes since the last simplifyCFG) might have
768774 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
769775 OptimizePM.addPass(SimplifyCFGPass());
141141 FUNCTION_PASS("consthoist", ConstantHoistingPass())
142142 FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
143143 FUNCTION_PASS("dce", DCEPass())
144 FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
144145 FUNCTION_PASS("dse", DSEPass())
145146 FUNCTION_PASS("dot-cfg", CFGPrinterPass())
146147 FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
25142514 return isLegalMaskedGather(DataType);
25152515 }
25162516
2517 bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
2518 EVT VT = TLI->getValueType(DL, DataType);
2519 return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT);
2520 }
2521
25172522 bool X86TTIImpl::areInlineCompatible(const Function *Caller,
25182523 const Function *Callee) const {
25192524 const TargetMachine &TM = getTLI()->getTargetMachine();
123123 bool isLegalMaskedStore(Type *DataType);
124124 bool isLegalMaskedGather(Type *DataType);
125125 bool isLegalMaskedScatter(Type *DataType);
126 bool hasDivRemOp(Type *DataType, bool IsSigned);
126127 bool areInlineCompatible(const Function *Caller,
127128 const Function *Callee) const;
128129 bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize);
672672 // Get rid of LCSSA nodes.
673673 MPM.add(createInstructionSimplifierPass());
674674
675 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
676 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
677 // flattening of blocks.
678 MPM.add(createDivRemPairsPass());
679
675680 // LoopSink (and other loop passes since the last simplifyCFG) might have
676681 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
677682 MPM.add(createCFGSimplificationPass());
66 CorrelatedValuePropagation.cpp
77 DCE.cpp
88 DeadStoreElimination.cpp
9 DivRemPairs.cpp
910 EarlyCSE.cpp
1011 FlattenCFGPass.cpp
1112 Float2Int.cpp
0 //===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass hoists and/or decomposes integer division and remainder
10 // instructions to enable CFG improvements and better codegen.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Transforms/Scalar/DivRemPairs.h"
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/Analysis/GlobalsModRef.h"
17 #include "llvm/Analysis/TargetTransformInfo.h"
18 #include "llvm/IR/Dominators.h"
19 #include "llvm/IR/Function.h"
20 #include "llvm/Pass.h"
21 #include "llvm/Transforms/Scalar.h"
22 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
23 using namespace llvm;
24
25 #define DEBUG_TYPE "div-rem-pairs"
26 STATISTIC(NumPairs, "Number of div/rem pairs");
27 STATISTIC(NumHoisted, "Number of instructions hoisted");
28 STATISTIC(NumDecomposed, "Number of instructions decomposed");
29
30 /// Find matching pairs of integer div/rem ops (they have the same numerator,
31 /// denominator, and signedness). If they exist in different basic blocks, bring
32 /// them together by hoisting or replace the common division operation that is
33 /// implicit in the remainder:
34 /// X % Y <--> X - ((X / Y) * Y).
35 ///
36 /// We can largely ignore the normal safety and cost constraints on speculation
37 /// of these ops when we find a matching pair. This is because we are already
38 /// guaranteed that any exceptions and most cost are already incurred by the
39 /// first member of the pair.
40 ///
41 /// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
42 /// SimplifyCFG, but it's split off on its own because it's different enough
43 /// that it doesn't quite match the stated objectives of those passes.
44 static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
45 const DominatorTree &DT) {
46 bool Changed = false;
47
48 // Insert all divide and remainder instructions into maps keyed by their
49 // operands and opcode (signed or unsigned).
50 DenseMap DivMap, RemMap;
51 for (auto &BB : F) {
52 for (auto &I : BB) {
53 if (I.getOpcode() == Instruction::SDiv)
54 DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
55 else if (I.getOpcode() == Instruction::UDiv)
56 DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
57 else if (I.getOpcode() == Instruction::SRem)
58 RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
59 else if (I.getOpcode() == Instruction::URem)
60 RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
61 }
62 }
63
64 // We can iterate over either map because we are only looking for matched
65 // pairs. Choose remainders for efficiency because they are usually even more
66 // rare than division.
67 for (auto &RemPair : RemMap) {
68 // Find the matching division instruction from the division map.
69 Instruction *DivInst = DivMap[RemPair.getFirst()];
70 if (!DivInst)
71 continue;
72
73 // We have a matching pair of div/rem instructions. If one dominates the
74 // other, hoist and/or replace one.
75 NumPairs++;
76 Instruction *RemInst = RemPair.getSecond();
77 bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
78 bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);
79
80 // If the target supports div+rem and the instructions are in the same block
81 // already, there's nothing to do. The backend should handle this. If the
82 // target does not support div+rem, then we will decompose the rem.
83 if (HasDivRemOp && RemInst->getParent() == DivInst->getParent())
84 continue;
85
86 bool DivDominates = DT.dominates(DivInst, RemInst);
87 if (!DivDominates && !DT.dominates(RemInst, DivInst))
88 continue;
89
90 if (HasDivRemOp) {
91 // The target has a single div/rem operation. Hoist the lower instruction
92 // to make the matched pair visible to the backend.
93 if (DivDominates)
94 RemInst->moveAfter(DivInst);
95 else
96 DivInst->moveAfter(RemInst);
97 NumHoisted++;
98 } else {
99 // The target does not have a single div/rem operation. Decompose the
100 // remainder calculation as:
101 // X % Y --> X - ((X / Y) * Y).
102 Value *X = RemInst->getOperand(0);
103 Value *Y = RemInst->getOperand(1);
104 Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
105 Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
106
107 // If the remainder dominates, then hoist the division up to that block:
108 //
109 // bb1:
110 // %rem = srem %x, %y
111 // bb2:
112 // %div = sdiv %x, %y
113 // -->
114 // bb1:
115 // %div = sdiv %x, %y
116 // %mul = mul %div, %y
117 // %rem = sub %x, %mul
118 //
119 // If the division dominates, it's already in the right place. The mul+sub
120 // will be in a different block because we don't assume that they are
121 // cheap to speculatively execute:
122 //
123 // bb1:
124 // %div = sdiv %x, %y
125 // bb2:
126 // %rem = srem %x, %y
127 // -->
128 // bb1:
129 // %div = sdiv %x, %y
130 // bb2:
131 // %mul = mul %div, %y
132 // %rem = sub %x, %mul
133 //
134 // If the div and rem are in the same block, we do the same transform,
135 // but any code movement would be within the same block.
136
137 if (!DivDominates)
138 DivInst->moveBefore(RemInst);
139 Mul->insertAfter(RemInst);
140 Sub->insertAfter(Mul);
141
142 // Now kill the explicit remainder. We have replaced it with:
143 // (sub X, (mul (div X, Y), Y)
144 RemInst->replaceAllUsesWith(Sub);
145 RemInst->eraseFromParent();
146 NumDecomposed++;
147 }
148 Changed = true;
149 }
150
151 return Changed;
152 }
153
154 // Pass manager boilerplate below here.
155
156 namespace {
157 struct DivRemPairsLegacyPass : public FunctionPass {
158 static char ID;
159 DivRemPairsLegacyPass() : FunctionPass(ID) {
160 initializeDivRemPairsLegacyPassPass(*PassRegistry::getPassRegistry());
161 }
162
163 void getAnalysisUsage(AnalysisUsage &AU) const override {
164 AU.addRequired();
165 AU.addRequired();
166 AU.setPreservesCFG();
167 AU.addPreserved();
168 AU.addPreserved();
169 FunctionPass::getAnalysisUsage(AU);
170 }
171
172 bool runOnFunction(Function &F) override {
173 if (skipFunction(F))
174 return false;
175 auto &TTI = getAnalysis().getTTI(F);
176 auto &DT = getAnalysis().getDomTree();
177 return optimizeDivRem(F, TTI, DT);
178 }
179 };
180 }
181
182 char DivRemPairsLegacyPass::ID = 0;
183 INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
184 "Hoist/decompose integer division and remainder", false,
185 false)
186 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
187 INITIALIZE_PASS_END(DivRemPairsLegacyPass, "div-rem-pairs",
188 "Hoist/decompose integer division and remainder", false,
189 false)
190 FunctionPass *llvm::createDivRemPairsPass() {
191 return new DivRemPairsLegacyPass();
192 }
193
194 PreservedAnalyses DivRemPairsPass::run(Function &F,
195 FunctionAnalysisManager &FAM) {
196 TargetTransformInfo &TTI = FAM.getResult(F);
197 DominatorTree &DT = FAM.getResult(F);
198 if (!optimizeDivRem(F, TTI, DT))
199 return PreservedAnalyses::all();
200 // TODO: This pass just hoists/replaces math ops - all analyses are preserved?
201 PreservedAnalyses PA;
202 PA.preserveSet();
203 PA.preserve();
204 return PA;
205 }
3939 initializeCorrelatedValuePropagationPass(Registry);
4040 initializeDCELegacyPassPass(Registry);
4141 initializeDeadInstEliminationPass(Registry);
42 initializeDivRemPairsLegacyPassPass(Registry);
4243 initializeScalarizerPass(Registry);
4344 initializeDSELegacyPassPass(Registry);
4445 initializeGuardWideningLegacyPassPass(Registry);
204204 ; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
205205 ; CHECK-O-NEXT: Running pass: LoopSinkPass
206206 ; CHECK-O-NEXT: Running pass: InstSimplifierPass
207 ; CHECK-O-NEXT: Running pass: DivRemPairsPass
207208 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
208209 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
209210 ; CHECK-O-NEXT: Running pass: GlobalDCEPass
192192 ; CHECK-POSTLINK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
193193 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopSinkPass
194194 ; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifierPass
195 ; CHECK-POSTLINK-O-NEXT: Running pass: DivRemPairsPass
195196 ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
196197 ; CHECK-POSTLINK-O-NEXT: Finished llvm::Function pass manager run.
197198 ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass
0 ; RUN: opt < %s -div-rem-pairs -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=X86
1 ; RUN: opt < %s -div-rem-pairs -S -mtriple=powerpc64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=PPC
2
3 declare void @foo(i32, i32)
4
5 define void @decompose_illegal_srem_same_block(i32 %a, i32 %b) {
6 ; X86-LABEL: @decompose_illegal_srem_same_block(
7 ; X86-NEXT: [[REM:%.*]] = srem i32 %a, %b
8 ; X86-NEXT: [[DIV:%.*]] = sdiv i32 %a, %b
9 ; X86-NEXT: call void @foo(i32 [[REM]], i32 [[DIV]])
10 ; X86-NEXT: ret void
11 ;
12 ; PPC-LABEL: @decompose_illegal_srem_same_block(
13 ; PPC-NEXT: [[DIV:%.*]] = sdiv i32 %a, %b
14 ; PPC-NEXT: [[TMP1:%.*]] = mul i32 [[DIV]], %b
15 ; PPC-NEXT: [[TMP2:%.*]] = sub i32 %a, [[TMP1]]
16 ; PPC-NEXT: call void @foo(i32 [[TMP2]], i32 [[DIV]])
17 ; PPC-NEXT: ret void
18 ;
19 %rem = srem i32 %a, %b
20 %div = sdiv i32 %a, %b
21 call void @foo(i32 %rem, i32 %div)
22 ret void
23 }
24
25 define void @decompose_illegal_urem_same_block(i32 %a, i32 %b) {
26 ; X86-LABEL: @decompose_illegal_urem_same_block(
27 ; X86-NEXT: [[DIV:%.*]] = udiv i32 %a, %b
28 ; X86-NEXT: [[REM:%.*]] = urem i32 %a, %b
29 ; X86-NEXT: call void @foo(i32 [[REM]], i32 [[DIV]])
30 ; X86-NEXT: ret void
31 ;
32 ; PPC-LABEL: @decompose_illegal_urem_same_block(
33 ; PPC-NEXT: [[DIV:%.*]] = udiv i32 %a, %b
34 ; PPC-NEXT: [[TMP1:%.*]] = mul i32 [[DIV]], %b
35 ; PPC-NEXT: [[TMP2:%.*]] = sub i32 %a, [[TMP1]]
36 ; PPC-NEXT: call void @foo(i32 [[TMP2]], i32 [[DIV]])
37 ; PPC-NEXT: ret void
38 ;
39 %div = udiv i32 %a, %b
40 %rem = urem i32 %a, %b
41 call void @foo(i32 %rem, i32 %div)
42 ret void
43 }
44
45 ; Hoist and optionally decompose the sdiv because it's safe and free.
46 ; PR31028 - https://bugs.llvm.org/show_bug.cgi?id=31028
47
48 define i32 @hoist_sdiv(i32 %a, i32 %b) {
49 ; X86-LABEL: @hoist_sdiv(
50 ; X86-NEXT: entry:
51 ; X86-NEXT: [[REM:%.*]] = srem i32 %a, %b
52 ; X86-NEXT: [[DIV:%.*]] = sdiv i32 %a, %b
53 ; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 42
54 ; X86-NEXT: br i1 [[CMP]], label %if, label %end
55 ; X86: if:
56 ; X86-NEXT: br label %end
57 ; X86: end:
58 ; X86-NEXT: [[RET:%.*]] = phi i32 [ [[DIV]], %if ], [ 3, %entry ]
59 ; X86-NEXT: ret i32 [[RET]]
60 ;
61 ; PPC-LABEL: @hoist_sdiv(
62 ; PPC-NEXT: entry:
63 ; PPC-NEXT: [[DIV:%.*]] = sdiv i32 %a, %b
64 ; PPC-NEXT: [[TMP0:%.*]] = mul i32 [[DIV]], %b
65 ; PPC-NEXT: [[TMP1:%.*]] = sub i32 %a, [[TMP0]]
66 ; PPC-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 42
67 ; PPC-NEXT: br i1 [[CMP]], label %if, label %end
68 ; PPC: if:
69 ; PPC-NEXT: br label %end
70 ; PPC: end:
71 ; PPC-NEXT: [[RET:%.*]] = phi i32 [ [[DIV]], %if ], [ 3, %entry ]
72 ; PPC-NEXT: ret i32 [[RET]]
73 ;
74 entry:
75 %rem = srem i32 %a, %b
76 %cmp = icmp eq i32 %rem, 42
77 br i1 %cmp, label %if, label %end
78
79 if:
80 %div = sdiv i32 %a, %b
81 br label %end
82
83 end:
84 %ret = phi i32 [ %div, %if ], [ 3, %entry ]
85 ret i32 %ret
86 }
87
88 ; Hoist and optionally decompose the udiv because it's safe and free.
89
90 define i64 @hoist_udiv(i64 %a, i64 %b) {
91 ; X86-LABEL: @hoist_udiv(
92 ; X86-NEXT: entry:
93 ; X86-NEXT: [[REM:%.*]] = urem i64 %a, %b
94 ; X86-NEXT: [[DIV:%.*]] = udiv i64 %a, %b
95 ; X86-NEXT: [[CMP:%.*]] = icmp eq i64 [[REM]], 42
96 ; X86-NEXT: br i1 [[CMP]], label %if, label %end
97 ; X86: if:
98 ; X86-NEXT: br label %end
99 ; X86: end:
100 ; X86-NEXT: [[RET:%.*]] = phi i64 [ [[DIV]], %if ], [ 3, %entry ]
101 ; X86-NEXT: ret i64 [[RET]]
102 ;
103 ; PPC-LABEL: @hoist_udiv(
104 ; PPC-NEXT: entry:
105 ; PPC-NEXT: [[DIV:%.*]] = udiv i64 %a, %b
106 ; PPC-NEXT: [[TMP0:%.*]] = mul i64 [[DIV]], %b
107 ; PPC-NEXT: [[TMP1:%.*]] = sub i64 %a, [[TMP0]]
108 ; PPC-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], 42
109 ; PPC-NEXT: br i1 [[CMP]], label %if, label %end
110 ; PPC: if:
111 ; PPC-NEXT: br label %end
112 ; PPC: end:
113 ; PPC-NEXT: [[RET:%.*]] = phi i64 [ [[DIV]], %if ], [ 3, %entry ]
114 ; PPC-NEXT: ret i64 [[RET]]
115 ;
116 entry:
117 %rem = urem i64 %a, %b
118 %cmp = icmp eq i64 %rem, 42
119 br i1 %cmp, label %if, label %end
120
121 if:
122 %div = udiv i64 %a, %b
123 br label %end
124
125 end:
126 %ret = phi i64 [ %div, %if ], [ 3, %entry ]
127 ret i64 %ret
128 }
129
130 ; Hoist the srem if it's safe and free, otherwise decompose it.
131
132 define i16 @hoist_srem(i16 %a, i16 %b) {
133 ; X86-LABEL: @hoist_srem(
134 ; X86-NEXT: entry:
135 ; X86-NEXT: [[DIV:%.*]] = sdiv i16 %a, %b
136 ; X86-NEXT: [[REM:%.*]] = srem i16 %a, %b
137 ; X86-NEXT: [[CMP:%.*]] = icmp eq i16 [[DIV]], 42
138 ; X86-NEXT: br i1 [[CMP]], label %if, label %end
139 ; X86: if:
140 ; X86-NEXT: br label %end
141 ; X86: end:
142 ; X86-NEXT: [[RET:%.*]] = phi i16 [ [[REM]], %if ], [ 3, %entry ]
143 ; X86-NEXT: ret i16 [[RET]]
144 ;
145 ; PPC-LABEL: @hoist_srem(
146 ; PPC-NEXT: entry:
147 ; PPC-NEXT: [[DIV:%.*]] = sdiv i16 %a, %b
148 ; PPC-NEXT: [[CMP:%.*]] = icmp eq i16 [[DIV]], 42
149 ; PPC-NEXT: br i1 [[CMP]], label %if, label %end
150 ; PPC: if:
151 ; PPC-NEXT: [[TMP0:%.*]] = mul i16 [[DIV]], %b
152 ; PPC-NEXT: [[TMP1:%.*]] = sub i16 %a, [[TMP0]]
153 ; PPC-NEXT: br label %end
154 ; PPC: end:
155 ; PPC-NEXT: [[RET:%.*]] = phi i16 [ [[TMP1]], %if ], [ 3, %entry ]
156 ; PPC-NEXT: ret i16 [[RET]]
157 ;
158 entry:
159 %div = sdiv i16 %a, %b
160 %cmp = icmp eq i16 %div, 42
161 br i1 %cmp, label %if, label %end
162
163 if:
164 %rem = srem i16 %a, %b
165 br label %end
166
167 end:
168 %ret = phi i16 [ %rem, %if ], [ 3, %entry ]
169 ret i16 %ret
170 }
171
172 ; Hoist the urem if it's safe and free, otherwise decompose it.
173
174 define i8 @hoist_urem(i8 %a, i8 %b) {
175 ; X86-LABEL: @hoist_urem(
176 ; X86-NEXT: entry:
177 ; X86-NEXT: [[DIV:%.*]] = udiv i8 %a, %b
178 ; X86-NEXT: [[REM:%.*]] = urem i8 %a, %b
179 ; X86-NEXT: [[CMP:%.*]] = icmp eq i8 [[DIV]], 42
180 ; X86-NEXT: br i1 [[CMP]], label %if, label %end
181 ; X86: if:
182 ; X86-NEXT: br label %end
183 ; X86: end:
184 ; X86-NEXT: [[RET:%.*]] = phi i8 [ [[REM]], %if ], [ 3, %entry ]
185 ; X86-NEXT: ret i8 [[RET]]
186 ;
187 ; PPC-LABEL: @hoist_urem(
188 ; PPC-NEXT: entry:
189 ; PPC-NEXT: [[DIV:%.*]] = udiv i8 %a, %b
190 ; PPC-NEXT: [[CMP:%.*]] = icmp eq i8 [[DIV]], 42
191 ; PPC-NEXT: br i1 [[CMP]], label %if, label %end
192 ; PPC: if:
193 ; PPC-NEXT: [[TMP0:%.*]] = mul i8 [[DIV]], %b
194 ; PPC-NEXT: [[TMP1:%.*]] = sub i8 %a, [[TMP0]]
195 ; PPC-NEXT: br label %end
196 ; PPC: end:
197 ; PPC-NEXT: [[RET:%.*]] = phi i8 [ [[TMP1]], %if ], [ 3, %entry ]
198 ; PPC-NEXT: ret i8 [[RET]]
199 ;
200 entry:
201 %div = udiv i8 %a, %b
202 %cmp = icmp eq i8 %div, 42
203 br i1 %cmp, label %if, label %end
204
205 if:
206 %rem = urem i8 %a, %b
207 br label %end
208
209 end:
210 %ret = phi i8 [ %rem, %if ], [ 3, %entry ]
211 ret i8 %ret
212 }
213
214 ; If the ops don't match, don't do anything: signedness.
215
216 define i32 @dont_hoist_udiv(i32 %a, i32 %b) {
217 ; ALL-LABEL: @dont_hoist_udiv(
218 ; ALL-NEXT: entry:
219 ; ALL-NEXT: [[REM:%.*]] = srem i32 %a, %b
220 ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 42
221 ; ALL-NEXT: br i1 [[CMP]], label %if, label %end
222 ; ALL: if:
223 ; ALL-NEXT: [[DIV:%.*]] = udiv i32 %a, %b
224 ; ALL-NEXT: br label %end
225 ; ALL: end:
226 ; ALL-NEXT: [[RET:%.*]] = phi i32 [ [[DIV]], %if ], [ 3, %entry ]
227 ; ALL-NEXT: ret i32 [[RET]]
228 ;
229 entry:
230 %rem = srem i32 %a, %b
231 %cmp = icmp eq i32 %rem, 42
232 br i1 %cmp, label %if, label %end
233
234 if:
235 %div = udiv i32 %a, %b
236 br label %end
237
238 end:
239 %ret = phi i32 [ %div, %if ], [ 3, %entry ]
240 ret i32 %ret
241 }
242
243 ; If the ops don't match, don't do anything: operation.
244
245 define i32 @dont_hoist_srem(i32 %a, i32 %b) {
246 ; ALL-LABEL: @dont_hoist_srem(
247 ; ALL-NEXT: entry:
248 ; ALL-NEXT: [[REM:%.*]] = urem i32 %a, %b
249 ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 42
250 ; ALL-NEXT: br i1 [[CMP]], label %if, label %end
251 ; ALL: if:
252 ; ALL-NEXT: [[REM2:%.*]] = srem i32 %a, %b
253 ; ALL-NEXT: br label %end
254 ; ALL: end:
255 ; ALL-NEXT: [[RET:%.*]] = phi i32 [ [[REM2]], %if ], [ 3, %entry ]
256 ; ALL-NEXT: ret i32 [[RET]]
257 ;
258 entry:
259 %rem = urem i32 %a, %b
260 %cmp = icmp eq i32 %rem, 42
261 br i1 %cmp, label %if, label %end
262
263 if:
264 %rem2 = srem i32 %a, %b
265 br label %end
266
267 end:
268 %ret = phi i32 [ %rem2, %if ], [ 3, %entry ]
269 ret i32 %ret
270 }
271
272 ; If the ops don't match, don't do anything: operands.
273
274 define i32 @dont_hoist_sdiv(i32 %a, i32 %b, i32 %c) {
275 ; ALL-LABEL: @dont_hoist_sdiv(
276 ; ALL-NEXT: entry:
277 ; ALL-NEXT: [[REM:%.*]] = srem i32 %a, %b
278 ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 42
279 ; ALL-NEXT: br i1 [[CMP]], label %if, label %end
280 ; ALL: if:
281 ; ALL-NEXT: [[DIV:%.*]] = sdiv i32 %a, %c
282 ; ALL-NEXT: br label %end
283 ; ALL: end:
284 ; ALL-NEXT: [[RET:%.*]] = phi i32 [ [[DIV]], %if ], [ 3, %entry ]
285 ; ALL-NEXT: ret i32 [[RET]]
286 ;
287 entry:
288 %rem = srem i32 %a, %b
289 %cmp = icmp eq i32 %rem, 42
290 br i1 %cmp, label %if, label %end
291
292 if:
293 %div = sdiv i32 %a, %c
294 br label %end
295
296 end:
297 %ret = phi i32 [ %div, %if ], [ 3, %entry ]
298 ret i32 %ret
299 }
300
301 ; If the target doesn't have a unified div/rem op for the type, decompose rem in-place to mul+sub.
302
303 define i128 @dont_hoist_urem(i128 %a, i128 %b) {
304 ; ALL-LABEL: @dont_hoist_urem(
305 ; ALL-NEXT: entry:
306 ; ALL-NEXT: [[DIV:%.*]] = udiv i128 %a, %b
307 ; ALL-NEXT: [[CMP:%.*]] = icmp eq i128 [[DIV]], 42
308 ; ALL-NEXT: br i1 [[CMP]], label %if, label %end
309 ; ALL: if:
310 ; ALL-NEXT: [[TMP0:%.*]] = mul i128 [[DIV]], %b
311 ; ALL-NEXT: [[TMP1:%.*]] = sub i128 %a, [[TMP0]]
312 ; ALL-NEXT: br label %end
313 ; ALL: end:
314 ; ALL-NEXT: [[RET:%.*]] = phi i128 [ [[TMP1]], %if ], [ 3, %entry ]
315 ; ALL-NEXT: ret i128 [[RET]]
316 ;
317 entry:
318 %div = udiv i128 %a, %b
319 %cmp = icmp eq i128 %div, 42
320 br i1 %cmp, label %if, label %end
321
322 if:
323 %rem = urem i128 %a, %b
324 br label %end
325
326 end:
327 %ret = phi i128 [ %rem, %if ], [ 3, %entry ]
328 ret i128 %ret
329 }
330
331 ; We don't hoist if one op does not dominate the other,
332 ; but we could hoist both ops to the common predecessor block?
333
334 define i32 @no_domination(i1 %cmp, i32 %a, i32 %b) {
335 ; ALL-LABEL: @no_domination(
336 ; ALL-NEXT: entry:
337 ; ALL-NEXT: br i1 %cmp, label %if, label %else
338 ; ALL: if:
339 ; ALL-NEXT: [[DIV:%.*]] = sdiv i32 %a, %b
340 ; ALL-NEXT: br label %end
341 ; ALL: else:
342 ; ALL-NEXT: [[REM:%.*]] = srem i32 %a, %b
343 ; ALL-NEXT: br label %end
344 ; ALL: end:
345 ; ALL-NEXT: [[RET:%.*]] = phi i32 [ [[DIV]], %if ], [ [[REM]], %else ]
346 ; ALL-NEXT: ret i32 [[RET]]
347 ;
348 entry:
349 br i1 %cmp, label %if, label %else
350
351 if:
352 %div = sdiv i32 %a, %b
353 br label %end
354
355 else:
356 %rem = srem i32 %a, %b
357 br label %end
358
359 end:
360 %ret = phi i32 [ %div, %if ], [ %rem, %else ]
361 ret i32 %ret
362 }
363