llvm.org GIT mirror llvm / c038a78
Now that Reassociate's LinearizeExprTree can look through arbitrary expression topologies, it is quite possible for a leaf node to have huge multiplicity, for example: x0 = x*x, x1 = x0*x0, x2 = x1*x1, ... rapidly gives a value which is x raised to a vast power (the multiplicity, or weight, of x). This patch fixes the computation of weights by correctly computing them no matter how big they are, rather than just overflowing and getting a wrong value. It turns out that the weight for a value never needs more bits to represent than the value itself, so it is enough to represent weights as APInts of the same bitwidth and do the right overflow-avoiding dance steps when computing weights. As a side-effect it reduces the number of multiplies needed in some cases of large powers. While there, in view of external uses (eg by the vectorizer) I made LinearizeExprTree static, pushing the rank computation out into users. This is progress towards fixing PR13021. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158358 91177308-0d34-0410-b5e6-96231b3b80d8 Duncan Sands 7 years ago
6 changed file(s) with 526 addition(s) and 26 deletion(s). Raw diff Collapse all Expand all
916916 return getLShr(C1, C2, true);
917917 }
918918
919 /// getBinOpIdentity - Return the identity for the given binary operation,
920 /// i.e. a constant C such that X op C = X and C op X = X for every X. It
921 /// is an error to call this for an operation that doesn't have an identity.
922 static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty);
923
919924 /// Transparently provide more efficient getOperand methods.
920925 DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
921926
213213 ///
214214 bool isCommutative() const { return isCommutative(getOpcode()); }
215215 static bool isCommutative(unsigned op);
216
217 /// isIdempotent - Return true if the instruction is idempotent:
218 ///
219 /// Idempotent operators satisfy: x op x === x
220 ///
221 /// In LLVM, the And and Or operators are idempotent.
222 ///
223 bool isIdempotent() const { return isIdempotent(getOpcode()); }
224 static bool isIdempotent(unsigned op);
225
226 /// isNilpotent - Return true if the instruction is nilpotent:
227 ///
228 /// Nilpotent operators satisfy: x op x === Id,
229 ///
230 /// where Id is the identity for the operator, i.e. a constant such that
231 /// x op Id === x and Id op x === x for all x.
232 ///
233 /// In LLVM, the Xor operator is nilpotent.
234 ///
235 bool isNilpotent() const { return isNilpotent(getOpcode()); }
236 static bool isNilpotent(unsigned op);
216237
217238 /// mayWriteToMemory - Return true if this instruction may modify memory.
218239 ///
142142 Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder,
143143 SmallVectorImpl &Factors);
144144 Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl &Ops);
145 void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl &Ops);
146145 Value *RemoveFactorFromExpression(Value *V, Value *Factor);
147146 void EraseInst(Instruction *I);
148147 void OptimizeInst(Instruction *I);
250249 return Res;
251250 }
252251
252 /// CarmichaelShift - Returns k such that lambda(2^Bitwidth) = 2^k, where lambda
253 /// is the Carmichael function. This means that x^(2^k) === 1 mod 2^Bitwidth for
254 /// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
255 /// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
256 /// even x in Bitwidth-bit arithmetic.
257 static unsigned CarmichaelShift(unsigned Bitwidth) {
258 if (Bitwidth < 3)
259 return Bitwidth - 1;
260 return Bitwidth - 2;
261 }
262
263 /// IncorporateWeight - Add the extra weight 'RHS' to the existing weight 'LHS',
264 /// reducing the combined weight using any special properties of the operation.
265 /// The existing weight LHS represents the computation X op X op ... op X where
266 /// X occurs LHS times. The combined weight represents X op X op ... op X with
267 /// X occurring LHS + RHS times. If op is "Xor" for example then the combined
268 /// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
269 /// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
270 static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
271 // If we were working with infinite precision arithmetic then the combined
272 // weight would be LHS + RHS. But we are using finite precision arithmetic,
273 // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
274 // for nilpotent operations and addition, but not for idempotent operations
275 // and multiplication), so it is important to correctly reduce the combined
276 // weight back into range if wrapping would be wrong.
277
278 // If RHS is zero then the weight didn't change.
279 if (RHS.isMinValue())
280 return;
281 // If LHS is zero then the combined weight is RHS.
282 if (LHS.isMinValue()) {
283 LHS = RHS;
284 return;
285 }
286 // From this point on we know that neither LHS nor RHS is zero.
287
288 if (Instruction::isIdempotent(Opcode)) {
289 // Idempotent means X op X === X, so any non-zero weight is equivalent to a
290 // weight of 1. Keeping weights at zero or one also means that wrapping is
291 // not a problem.
292 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
293 return; // Return a weight of 1.
294 }
295 if (Instruction::isNilpotent(Opcode)) {
296 // Nilpotent means X op X === 0, so reduce weights modulo 2.
297 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
298 LHS = 0; // 1 + 1 === 0 modulo 2.
299 return;
300 }
301 if (Opcode == Instruction::Add) {
302 // TODO: Reduce the weight by exploiting nsw/nuw?
303 LHS += RHS;
304 return;
305 }
306
307 assert(Opcode == Instruction::Mul && "Unknown associative operation!");
308 unsigned Bitwidth = LHS.getBitWidth();
309 // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
310 // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
311 // bit number x, since either x is odd in which case x^CM = 1, or x is even in
312 // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
313 // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
314 // which by a happy accident means that they can always be represented using
315 // Bitwidth bits.
316 // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
317 // the Carmichael number).
318 if (Bitwidth > 3) {
319 /// CM - The value of Carmichael's lambda function.
320 APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
321 // Any weight W >= Threshold can be replaced with W - CM.
322 APInt Threshold = CM + Bitwidth;
323 assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
324 // For Bitwidth 4 or more the following sum does not overflow.
325 LHS += RHS;
326 while (LHS.uge(Threshold))
327 LHS -= CM;
328 } else {
329 // To avoid problems with overflow do everything the same as above but using
330 // a larger type.
331 unsigned CM = 1U << CarmichaelShift(Bitwidth);
332 unsigned Threshold = CM + Bitwidth;
333 assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
334 "Weights not reduced!");
335 unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
336 while (Total >= Threshold)
337 Total -= CM;
338 LHS = Total;
339 }
340 }
341
342 /// EvaluateRepeatedConstant - Compute C op C op ... op C where the constant C
343 /// is repeated Weight times.
344 static Constant *EvaluateRepeatedConstant(unsigned Opcode, Constant *C,
345 APInt Weight) {
346 // For addition the result can be efficiently computed as the product of the
347 // constant and the weight.
348 if (Opcode == Instruction::Add)
349 return ConstantExpr::getMul(C, ConstantInt::get(C->getContext(), Weight));
350
351 // The weight might be huge, so compute by repeated squaring to ensure that
352 // compile time is proportional to the logarithm of the weight.
353 Constant *Result = 0;
354 Constant *Power = C; // Successively C, C op C, (C op C) op (C op C) etc.
355 // Visit the bits in Weight.
356 while (Weight != 0) {
357 // If the current bit in Weight is non-zero do Result = Result op Power.
358 if (Weight[0])
359 Result = Result ? ConstantExpr::get(Opcode, Result, Power) : Power;
360 // Move on to the next bit if any more are non-zero.
361 Weight = Weight.lshr(1);
362 if (Weight.isMinValue())
363 break;
364 // Square the power.
365 Power = ConstantExpr::get(Opcode, Power, Power);
366 }
367
368 assert(Result && "Only positive weights supported!");
369 return Result;
370 }
371
372 typedef std::pair RepeatedValue;
373
253374 /// LinearizeExprTree - Given an associative binary expression, return the leaf
254 /// nodes in Ops. The original expression is the same as Ops[0] op ... Ops[N].
255 /// Note that a node may occur multiple times in Ops, but if so all occurrences
256 /// are consecutive in the vector.
375 /// nodes in Ops along with their weights (how many times the leaf occurs). The
376 /// original expression is the same as
377 /// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
378 /// op
379 /// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
380 /// op
381 /// ...
382 /// op
383 /// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
384 ///
385 /// Note that the values Ops[0].first, ..., Ops[N].first are all distinct, and
386 /// they are all non-constant except possibly for the last one, which if it is
387 /// constant will have weight one (Ops[N].second === 1).
388 ///
389 /// This routine may modify the function, in which case it returns 'true'. The
390 /// changes it makes may well be destructive, changing the value computed by 'I'
391 /// to something completely different. Thus if the routine returns 'true' then
392 /// you MUST either replace I with a new expression computed from the Ops array,
393 /// or use RewriteExprTree to put the values back in.
257394 ///
258395 /// A leaf node is either not a binary operation of the same kind as the root
259396 /// node 'I' (i.e. is not a binary operator at all, or is, but with a different
275412 /// + * | F, G
276413 ///
277414 /// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
278 /// that order) C, E, F, F, G, G.
415 /// that order) (C, 1), (E, 1), (F, 2), (G, 2).
279416 ///
280417 /// The expression is maximal: if some instruction is a binary operator of the
281418 /// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
286423 /// order to ensure that every non-root node in the expression has *exactly one*
287424 /// use by a non-leaf node of the expression. This destruction means that the
288425 /// caller MUST either replace 'I' with a new expression or use something like
289 /// RewriteExprTree to put the values back in.
426 /// RewriteExprTree to put the values back in if the routine indicates that it
427 /// made a change by returning 'true'.
290428 ///
291429 /// In the above example either the right operand of A or the left operand of B
292430 /// will be replaced by undef. If it is B's operand then this gives:
309447 /// of the expression) if it can turn them into binary operators of the right
310448 /// type and thus make the expression bigger.
311449
312 void Reassociate::LinearizeExprTree(BinaryOperator *I,
313 SmallVectorImpl &Ops) {
450 static bool LinearizeExprTree(BinaryOperator *I,
451 SmallVectorImpl &Ops) {
314452 DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
453 unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
454 unsigned Opcode = I->getOpcode();
455 assert(Instruction::isAssociative(Opcode) &&
456 Instruction::isCommutative(Opcode) &&
457 "Expected an associative and commutative operation!");
315458
316459 // Visit all operands of the expression, keeping track of their weight (the
317460 // number of paths from the expression root to the operand, or if you like
323466 // with their weights, representing a certain number of paths to the operator.
324467 // If an operator occurs in the worklist multiple times then we found multiple
325468 // ways to get to it.
326 SmallVector, 8> Worklist; // (Op, Weight)
327 Worklist.push_back(std::make_pair(I, 1));
328 unsigned Opcode = I->getOpcode();
469 SmallVector, 8> Worklist; // (Op, Weight)
470 Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
471 bool MadeChange = false;
329472
330473 // Leaves of the expression are values that either aren't the right kind of
331474 // operation (eg: a constant, or a multiply in an add tree), or are, but have
342485
343486 // Leaves - Keeps track of the set of putative leaves as well as the number of
344487 // paths to each leaf seen so far.
345 typedef SmallMapunsigned, 8> LeafMap;
488 typedef SmallMapAPInt, 8> LeafMap;
346489 LeafMap Leaves; // Leaf -> Total weight so far.
347490 SmallVector LeafOrder; // Ensure deterministic leaf output order.
348491
350493 SmallPtrSet Visited; // For sanity checking the iteration scheme.
351494 #endif
352495 while (!Worklist.empty()) {
353 std::pairunsigned> P = Worklist.pop_back_val();
496 std::pairAPInt> P = Worklist.pop_back_val();
354497 I = P.first; // We examine the operands of this binary operator.
355 assert(P.second >= 1 && "No paths to here, so how did we get here?!");
356498
357499 for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands.
358500 Value *Op = I->getOperand(OpIdx);
359 unsigned Weight = P.second; // Number of paths to this operand.
501 APInt Weight = P.second; // Number of paths to this operand.
360502 DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
361503 assert(!Op->use_empty() && "No uses, so how did we get to it?!");
362504
388530 assert(Visited.count(Op) && "In leaf map but not visited!");
389531
390532 // Update the number of paths to the leaf.
391 It->second += Weight;
533 IncorporateWeight(It->second, Weight, Opcode);
392534
393535 // The leaf already has one use from inside the expression. As we want
394536 // exactly one such use, drop this new use of the leaf.
449591
450592 // The leaves, repeated according to their weights, represent the linearized
451593 // form of the expression.
594 Constant *Cst = 0; // Accumulate constants here.
452595 for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
453596 Value *V = LeafOrder[i];
454597 LeafMap::iterator It = Leaves.find(V);
455598 if (It == Leaves.end())
456 // Leaf already output, or node initially thought to be a leaf wasn't.
599 // Node initially thought to be a leaf wasn't.
457600 continue;
458601 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
459 unsigned Weight = It->second;
460 assert(Weight > 0 && "No paths to this value!");
461 // FIXME: Rather than repeating values Weight times, use a vector of
462 // (ValueEntry, multiplicity) pairs.
463 Ops.append(Weight, ValueEntry(getRank(V), V));
602 APInt Weight = It->second;
603 if (Weight.isMinValue())
604 // Leaf already output or weight reduction eliminated it.
605 continue;
464606 // Ensure the leaf is only output once.
465 Leaves.erase(It);
466 }
607 It->second = 0;
608 // Glob all constants together into Cst.
609 if (Constant *C = dyn_cast(V)) {
610 C = EvaluateRepeatedConstant(Opcode, C, Weight);
611 Cst = Cst ? ConstantExpr::get(Opcode, Cst, C) : C;
612 continue;
613 }
614 // Add non-constant
615 Ops.push_back(std::make_pair(V, Weight));
616 }
617
618 // Add any constants back into Ops, all globbed together and reduced to having
619 // weight 1 for the convenience of users.
620 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType()))
621 Ops.push_back(std::make_pair(Cst, APInt(Bitwidth, 1)));
622
623 // For nilpotent operations or addition there may be no operands, for example
624 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
625 // in both cases the weight reduces to 0 causing the value to be skipped.
626 if (Ops.empty()) {
627 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
628 Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1)));
629 }
630
631 return MadeChange;
467632 }
468633
469634 // RewriteExprTree - Now that the operands for this expression tree are
774939 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
775940 if (!BO) return 0;
776941
942 SmallVector Tree;
943 MadeChange |= LinearizeExprTree(BO, Tree);
777944 SmallVector Factors;
778 LinearizeExprTree(BO, Factors);
945 Factors.reserve(Tree.size());
946 for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
947 RepeatedValue E = Tree[i];
948 Factors.append(E.second.getZExtValue(),
949 ValueEntry(getRank(E.first), E.first));
950 }
779951
780952 bool FoundFactor = false;
781953 bool NeedsNegate = false;
14381610
14391611 // First, walk the expression tree, linearizing the tree, collecting the
14401612 // operand information.
1613 SmallVector Tree;
1614 MadeChange |= LinearizeExprTree(I, Tree);
14411615 SmallVector Ops;
1442 LinearizeExprTree(I, Ops);
1616 Ops.reserve(Tree.size());
1617 for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
1618 RepeatedValue E = Tree[i];
1619 Ops.append(E.second.getZExtValue(),
1620 ValueEntry(getRank(E.first), E.first));
1621 }
14431622
14441623 DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
14451624
20062006 isExact ? PossiblyExactOperator::IsExact : 0);
20072007 }
20082008
2009 /// getBinOpIdentity - Return the identity for the given binary operation,
2010 /// i.e. a constant C such that X op C = X and C op X = X for every X. It
2011 /// is an error to call this for an operation that doesn't have an identity.
2012 Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty) {
2013 switch (Opcode) {
2014 default:
2015 llvm_unreachable("Not a binary operation with identity");
2016 case Instruction::Add:
2017 case Instruction::Or:
2018 case Instruction::Xor:
2019 return Constant::getNullValue(Ty);
2020
2021 case Instruction::Mul:
2022 return ConstantInt::get(Ty, 1);
2023
2024 case Instruction::And:
2025 return Constant::getAllOnesValue(Ty);
2026 }
2027 }
2028
20092029 // destroyConstant - Remove the constant from the constant table...
20102030 //
20112031 void ConstantExpr::destroyConstant() {
394394 }
395395 }
396396
397 /// isIdempotent - Return true if the instruction is idempotent:
398 ///
399 /// Idempotent operators satisfy: x op x === x
400 ///
401 /// In LLVM, the And and Or operators are idempotent.
402 ///
403 bool Instruction::isIdempotent(unsigned Opcode) {
404 return Opcode == And || Opcode == Or;
405 }
406
407 /// isNilpotent - Return true if the instruction is nilpotent:
408 ///
409 /// Nilpotent operators satisfy: x op x === Id,
410 ///
411 /// where Id is the identity for the operator, i.e. a constant such that
412 /// x op Id === x and Id op x === x for all x.
413 ///
414 /// In LLVM, the Xor operator is nilpotent.
415 ///
416 bool Instruction::isNilpotent(unsigned Opcode) {
417 return Opcode == Xor;
418 }
419
397420 Instruction *Instruction::clone() const {
398421 Instruction *New = clone_impl();
399422 New->SubclassOptionalData = SubclassOptionalData;
0 ; RUN: opt < %s -reassociate -S | FileCheck %s
1
2 ; Tests involving repeated operations on the same value.
3
4 define i8 @nilpotent(i8 %x) {
5 ; CHECK: @nilpotent
6 %tmp = xor i8 %x, %x
7 ret i8 %tmp
8 ; CHECK: ret i8 0
9 }
10
11 define i2 @idempotent(i2 %x) {
12 ; CHECK: @idempotent
13 %tmp1 = and i2 %x, %x
14 %tmp2 = and i2 %tmp1, %x
15 %tmp3 = and i2 %tmp2, %x
16 ret i2 %tmp3
17 ; CHECK: ret i2 %x
18 }
19
20 define i2 @add(i2 %x) {
21 ; CHECK: @add
22 %tmp1 = add i2 %x, %x
23 %tmp2 = add i2 %tmp1, %x
24 %tmp3 = add i2 %tmp2, %x
25 ret i2 %tmp3
26 ; CHECK: ret i2 0
27 }
28
29 define i2 @cst_add() {
30 ; CHECK: @cst_add
31 %tmp1 = add i2 1, 1
32 %tmp2 = add i2 %tmp1, 1
33 ret i2 %tmp2
34 ; CHECK: ret i2 -1
35 }
36
37 define i8 @cst_mul() {
38 ; CHECK: @cst_mul
39 %tmp1 = mul i8 3, 3
40 %tmp2 = mul i8 %tmp1, 3
41 %tmp3 = mul i8 %tmp2, 3
42 %tmp4 = mul i8 %tmp3, 3
43 ret i8 %tmp4
44 ; CHECK: ret i8 -13
45 }
46
47 define i3 @foo3x5(i3 %x) {
48 ; Can be done with two multiplies.
49 ; CHECK: @foo3x5
50 ; CHECK-NEXT: mul
51 ; CHECK-NEXT: mul
52 ; CHECK-NEXT: ret
53 %tmp1 = mul i3 %x, %x
54 %tmp2 = mul i3 %tmp1, %x
55 %tmp3 = mul i3 %tmp2, %x
56 %tmp4 = mul i3 %tmp3, %x
57 ret i3 %tmp4
58 }
59
60 define i3 @foo3x6(i3 %x) {
61 ; Can be done with two multiplies.
62 ; CHECK: @foo3x6
63 ; CHECK-NEXT: mul
64 ; CHECK-NEXT: mul
65 ; CHECK-NEXT: ret
66 %tmp1 = mul i3 %x, %x
67 %tmp2 = mul i3 %tmp1, %x
68 %tmp3 = mul i3 %tmp2, %x
69 %tmp4 = mul i3 %tmp3, %x
70 %tmp5 = mul i3 %tmp4, %x
71 ret i3 %tmp5
72 }
73
74 define i3 @foo3x7(i3 %x) {
75 ; Can be done with two multiplies.
76 ; CHECK: @foo3x7
77 ; CHECK-NEXT: mul
78 ; CHECK-NEXT: mul
79 ; CHECK-NEXT: ret
80 %tmp1 = mul i3 %x, %x
81 %tmp2 = mul i3 %tmp1, %x
82 %tmp3 = mul i3 %tmp2, %x
83 %tmp4 = mul i3 %tmp3, %x
84 %tmp5 = mul i3 %tmp4, %x
85 %tmp6 = mul i3 %tmp5, %x
86 ret i3 %tmp6
87 }
88
89 define i4 @foo4x8(i4 %x) {
90 ; Can be done with two multiplies.
91 ; CHECK: @foo4x8
92 ; CHECK-NEXT: mul
93 ; CHECK-NEXT: mul
94 ; CHECK-NEXT: ret
95 %tmp1 = mul i4 %x, %x
96 %tmp2 = mul i4 %tmp1, %x
97 %tmp3 = mul i4 %tmp2, %x
98 %tmp4 = mul i4 %tmp3, %x
99 %tmp5 = mul i4 %tmp4, %x
100 %tmp6 = mul i4 %tmp5, %x
101 %tmp7 = mul i4 %tmp6, %x
102 ret i4 %tmp7
103 }
104
105 define i4 @foo4x9(i4 %x) {
106 ; Can be done with three multiplies.
107 ; CHECK: @foo4x9
108 ; CHECK-NEXT: mul
109 ; CHECK-NEXT: mul
110 ; CHECK-NEXT: mul
111 ; CHECK-NEXT: ret
112 %tmp1 = mul i4 %x, %x
113 %tmp2 = mul i4 %tmp1, %x
114 %tmp3 = mul i4 %tmp2, %x
115 %tmp4 = mul i4 %tmp3, %x
116 %tmp5 = mul i4 %tmp4, %x
117 %tmp6 = mul i4 %tmp5, %x
118 %tmp7 = mul i4 %tmp6, %x
119 %tmp8 = mul i4 %tmp7, %x
120 ret i4 %tmp8
121 }
122
123 define i4 @foo4x10(i4 %x) {
124 ; Can be done with three multiplies.
125 ; CHECK: @foo4x10
126 ; CHECK-NEXT: mul
127 ; CHECK-NEXT: mul
128 ; CHECK-NEXT: mul
129 ; CHECK-NEXT: ret
130 %tmp1 = mul i4 %x, %x
131 %tmp2 = mul i4 %tmp1, %x
132 %tmp3 = mul i4 %tmp2, %x
133 %tmp4 = mul i4 %tmp3, %x
134 %tmp5 = mul i4 %tmp4, %x
135 %tmp6 = mul i4 %tmp5, %x
136 %tmp7 = mul i4 %tmp6, %x
137 %tmp8 = mul i4 %tmp7, %x
138 %tmp9 = mul i4 %tmp8, %x
139 ret i4 %tmp9
140 }
141
142 define i4 @foo4x11(i4 %x) {
143 ; Can be done with four multiplies.
144 ; CHECK: @foo4x11
145 ; CHECK-NEXT: mul
146 ; CHECK-NEXT: mul
147 ; CHECK-NEXT: mul
148 ; CHECK-NEXT: mul
149 ; CHECK-NEXT: ret
150 %tmp1 = mul i4 %x, %x
151 %tmp2 = mul i4 %tmp1, %x
152 %tmp3 = mul i4 %tmp2, %x
153 %tmp4 = mul i4 %tmp3, %x
154 %tmp5 = mul i4 %tmp4, %x
155 %tmp6 = mul i4 %tmp5, %x
156 %tmp7 = mul i4 %tmp6, %x
157 %tmp8 = mul i4 %tmp7, %x
158 %tmp9 = mul i4 %tmp8, %x
159 %tmp10 = mul i4 %tmp9, %x
160 ret i4 %tmp10
161 }
162
163 define i4 @foo4x12(i4 %x) {
164 ; Can be done with two multiplies.
165 ; CHECK: @foo4x12
166 ; CHECK-NEXT: mul
167 ; CHECK-NEXT: mul
168 ; CHECK-NEXT: ret
169 %tmp1 = mul i4 %x, %x
170 %tmp2 = mul i4 %tmp1, %x
171 %tmp3 = mul i4 %tmp2, %x
172 %tmp4 = mul i4 %tmp3, %x
173 %tmp5 = mul i4 %tmp4, %x
174 %tmp6 = mul i4 %tmp5, %x
175 %tmp7 = mul i4 %tmp6, %x
176 %tmp8 = mul i4 %tmp7, %x
177 %tmp9 = mul i4 %tmp8, %x
178 %tmp10 = mul i4 %tmp9, %x
179 %tmp11 = mul i4 %tmp10, %x
180 ret i4 %tmp11
181 }
182
183 define i4 @foo4x13(i4 %x) {
184 ; Can be done with three multiplies.
185 ; CHECK: @foo4x13
186 ; CHECK-NEXT: mul
187 ; CHECK-NEXT: mul
188 ; CHECK-NEXT: mul
189 ; CHECK-NEXT: ret
190 %tmp1 = mul i4 %x, %x
191 %tmp2 = mul i4 %tmp1, %x
192 %tmp3 = mul i4 %tmp2, %x
193 %tmp4 = mul i4 %tmp3, %x
194 %tmp5 = mul i4 %tmp4, %x
195 %tmp6 = mul i4 %tmp5, %x
196 %tmp7 = mul i4 %tmp6, %x
197 %tmp8 = mul i4 %tmp7, %x
198 %tmp9 = mul i4 %tmp8, %x
199 %tmp10 = mul i4 %tmp9, %x
200 %tmp11 = mul i4 %tmp10, %x
201 %tmp12 = mul i4 %tmp11, %x
202 ret i4 %tmp12
203 }
204
205 define i4 @foo4x14(i4 %x) {
206 ; Can be done with three multiplies.
207 ; CHECK: @foo4x14
208 ; CHECK-NEXT: mul
209 ; CHECK-NEXT: mul
210 ; CHECK-NEXT: mul
211 ; CHECK-NEXT: ret
212 %tmp1 = mul i4 %x, %x
213 %tmp2 = mul i4 %tmp1, %x
214 %tmp3 = mul i4 %tmp2, %x
215 %tmp4 = mul i4 %tmp3, %x
216 %tmp5 = mul i4 %tmp4, %x
217 %tmp6 = mul i4 %tmp5, %x
218 %tmp7 = mul i4 %tmp6, %x
219 %tmp8 = mul i4 %tmp7, %x
220 %tmp9 = mul i4 %tmp8, %x
221 %tmp10 = mul i4 %tmp9, %x
222 %tmp11 = mul i4 %tmp10, %x
223 %tmp12 = mul i4 %tmp11, %x
224 %tmp13 = mul i4 %tmp12, %x
225 ret i4 %tmp13
226 }
227
228 define i4 @foo4x15(i4 %x) {
229 ; Can be done with four multiplies.
230 ; CHECK: @foo4x15
231 ; CHECK-NEXT: mul
232 ; CHECK-NEXT: mul
233 ; CHECK-NEXT: mul
234 ; CHECK-NEXT: mul
235 ; CHECK-NEXT: ret
236 %tmp1 = mul i4 %x, %x
237 %tmp2 = mul i4 %tmp1, %x
238 %tmp3 = mul i4 %tmp2, %x
239 %tmp4 = mul i4 %tmp3, %x
240 %tmp5 = mul i4 %tmp4, %x
241 %tmp6 = mul i4 %tmp5, %x
242 %tmp7 = mul i4 %tmp6, %x
243 %tmp8 = mul i4 %tmp7, %x
244 %tmp9 = mul i4 %tmp8, %x
245 %tmp10 = mul i4 %tmp9, %x
246 %tmp11 = mul i4 %tmp10, %x
247 %tmp12 = mul i4 %tmp11, %x
248 %tmp13 = mul i4 %tmp12, %x
249 %tmp14 = mul i4 %tmp13, %x
250 ret i4 %tmp14
251 }