llvm.org GIT mirror llvm / 05488db
This implements a more optimal algorithm for selecting a base constant in constant hoisting. It not only takes into account the number of uses and the cost of expressions in which constants appear, but now also the resulting integer range of the offsets. Thus, the algorithm maximizes the number of uses within an integer range that will enable more efficient code generation. On ARM, for example, this will enable code size optimisations because less negative offsets will be created. Negative offsets/immediates are not supported by Thumb1 thus preventing more compact instruction encoding. Differential Revision: http://reviews.llvm.org/D21183 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275382 91177308-0d34-0410-b5e6-96231b3b80d8 Sjoerd Meijer 4 years ago
8 changed file(s) with 186 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
413413 Type *Ty) const;
414414 int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
415415 Type *Ty) const;
416
417 /// \brief Return the expected cost for the given integer when optimising
418 /// for size. This is different than the other integer immediate cost
419 /// functions in that it is subtarget agnostic. This is useful when you e.g.
420 /// target one ISA such as Aarch32 but smaller encodings could be possible
421 /// with another such as Thumb. This return value is used as a penalty when
422 /// the total costs for a constant is calculated (the bigger the cost, the
423 /// more beneficial constant hoisting is).
424 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
425 Type *Ty) const;
416426 /// @}
417427
418428 /// \name Vector Target Information
664674 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
665675 virtual bool haveFastSqrt(Type *Ty) = 0;
666676 virtual int getFPOpCost(Type *Ty) = 0;
677 virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
678 Type *Ty) = 0;
667679 virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
668680 virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
669681 Type *Ty) = 0;
840852
841853 int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
842854
855 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
856 Type *Ty) override {
857 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
858 }
843859 int getIntImmCost(const APInt &Imm, Type *Ty) override {
844860 return Impl.getIntImmCost(Imm, Ty);
845861 }
255255 bool haveFastSqrt(Type *Ty) { return false; }
256256
257257 unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
258
259 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
260 Type *Ty) {
261 return 0;
262 }
258263
259264 unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
260265
133133 void collectConstantCandidates(Function &Fn);
134134 void findAndMakeBaseConstant(ConstCandVecType::iterator S,
135135 ConstCandVecType::iterator E);
136 unsigned maximizeConstantsInRange(ConstCandVecType::iterator S,
137 ConstCandVecType::iterator E,
138 ConstCandVecType::iterator &MaxCostItr);
136139 void findBaseConstants();
137140 void emitBaseConstants(Instruction *Base, Constant *Offset,
138141 const consthoist::ConstantUser &ConstUser);
208208 return Cost;
209209 }
210210
211 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
212 const APInt &Imm,
213 Type *Ty) const {
214 int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
215 assert(Cost >= 0 && "TTI should not produce negative costs!");
216 return Cost;
217 }
218
211219 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
212220 int Cost = TTIImpl->getIntImmCost(Imm, Ty);
213221 assert(Cost >= 0 && "TTI should not produce negative costs!");
4444 return 2;
4545 // Load from constantpool.
4646 return 3;
47 }
48
49
50 // Constants smaller than 256 fit in the immediate field of
51 // Thumb1 instructions so we return a zero cost and 1 otherwise.
52 int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
53 const APInt &Imm, Type *Ty) {
54 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
55 return 0;
56
57 return 1;
4758 }
4859
4960 int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
6262
6363 /// \name Scalar TTI Implementations
6464 /// @{
65
66 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
67 Type *Ty);
6568
6669 using BaseT::getIntImmCost;
6770 int getIntImmCost(const APInt &Imm, Type *Ty);
284284 collectConstantCandidates(ConstCandMap, &Inst);
285285 }
286286
287 // This helper function is necessary to deal with values that have different
288 // bit widths (APInt Operator- does not like that). If the value cannot be
289 // represented in uint64 we return an "empty" APInt. This is then interpreted
290 // as the value is not in range.
291 static llvm::Optional calculateOffsetDiff(APInt V1, APInt V2)
292 {
293 llvm::Optional Res = None;
294 unsigned BW = V1.getBitWidth() > V2.getBitWidth() ?
295 V1.getBitWidth() : V2.getBitWidth();
296 uint64_t LimVal1 = V1.getLimitedValue();
297 uint64_t LimVal2 = V2.getLimitedValue();
298
299 if (LimVal1 == ~0ULL || LimVal2 == ~0ULL)
300 return Res;
301
302 uint64_t Diff = LimVal1 - LimVal2;
303 return APInt(BW, Diff, true);
304 }
305
306 // From a list of constants, one needs to picked as the base and the other
307 // constants will be transformed into an offset from that base constant. The
308 // question is which we can pick best? For example, consider these constants
309 // and their number of uses:
310 //
311 // Constants| 2 | 4 | 12 | 42 |
312 // NumUses | 3 | 2 | 8 | 7 |
313 //
314 // Selecting constant 12 because it has the most uses will generate negative
315 // offsets for constants 2 and 4 (i.e. -10 and -8 respectively). If negative
316 // offsets lead to less optimal code generation, then there might be better
317 // solutions. Suppose immediates in the range of 0..35 are most optimally
318 // supported by the architecture, then selecting constant 2 is most optimal
319 // because this will generate offsets: 0, 2, 10, 40. Offsets 0, 2 and 10 are in
320 // range 0..35, and thus 3 + 2 + 8 = 13 uses are in range. Selecting 12 would
321 // have only 8 uses in range, so choosing 2 as a base is more optimal. Thus, in
322 // selecting the base constant the range of the offsets is a very important
323 // factor too that we take into account here. This algorithm calculates a total
324 // costs for selecting a constant as the base and substract the costs if
325 // immediates are out of range. It has quadratic complexity, so we call this
326 // function only when we're optimising for size and there are less than 100
327 // constants, we fall back to the straightforward algorithm otherwise
328 // which does not do all the offset calculations.
329 unsigned
330 ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
331 ConstCandVecType::iterator E,
332 ConstCandVecType::iterator &MaxCostItr) {
333 unsigned NumUses = 0;
334
335 if(!Entry->getParent()->optForSize() || std::distance(S,E) > 100) {
336 for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
337 NumUses += ConstCand->Uses.size();
338 if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
339 MaxCostItr = ConstCand;
340 }
341 return NumUses;
342 }
343
344 DEBUG(dbgs() << "== Maximize constants in range ==\n");
345 int MaxCost = -1;
346 for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
347 auto Value = ConstCand->ConstInt->getValue();
348 Type *Ty = ConstCand->ConstInt->getType();
349 int Cost = 0;
350 NumUses += ConstCand->Uses.size();
351 DEBUG(dbgs() << "= Constant: " << ConstCand->ConstInt->getValue() << "\n");
352
353 for (auto User : ConstCand->Uses) {
354 unsigned Opcode = User.Inst->getOpcode();
355 unsigned OpndIdx = User.OpndIdx;
356 Cost += TTI->getIntImmCost(Opcode, OpndIdx, Value, Ty);
357 DEBUG(dbgs() << "Cost: " << Cost << "\n");
358
359 for (auto C2 = S; C2 != E; ++C2) {
360 llvm::Optional Diff = calculateOffsetDiff(
361 C2->ConstInt->getValue(),
362 ConstCand->ConstInt->getValue());
363 if (Diff) {
364 const int ImmCosts =
365 TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.getValue(), Ty);
366 Cost -= ImmCosts;
367 DEBUG(dbgs() << "Offset " << Diff.getValue() << " "
368 << "has penalty: " << ImmCosts << "\n"
369 << "Adjusted cost: " << Cost << "\n");
370 }
371 }
372 }
373 DEBUG(dbgs() << "Cumulative cost: " << Cost << "\n");
374 if (Cost > MaxCost) {
375 MaxCost = Cost;
376 MaxCostItr = ConstCand;
377 DEBUG(dbgs() << "New candidate: " << MaxCostItr->ConstInt->getValue()
378 << "\n");
379 }
380 }
381 return NumUses;
382 }
383
287384 /// \brief Find the base constant within the given range and rebase all other
288385 /// constants with respect to the base constant.
289386 void ConstantHoistingPass::findAndMakeBaseConstant(
290387 ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
291388 auto MaxCostItr = S;
292 unsigned NumUses = 0;
293 // Use the constant that has the maximum cost as base constant.
294 for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
295 NumUses += ConstCand->Uses.size();
296 if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
297 MaxCostItr = ConstCand;
298 }
389 unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr);
299390
300391 // Don't hoist constants that have only one use.
301392 if (NumUses <= 1)
0 ; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
1
2 ; There are different candidates here for the base constant: 1073876992 and
3 ; 1073876996. But we don't want to see the latter because it results in
4 ; negative offsets.
5
6 define void @foo() #0 {
7 entry:
8 ; CHECK-LABEL: @foo
9 ; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
10 %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
11 %or = or i32 %0, 1
12 store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
13 %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
14 %and = and i32 %1, -117506048
15 store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
16 %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
17 %and1 = and i32 %2, -17367041
18 store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
19 %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
20 %and2 = and i32 %3, -262145
21 store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
22 %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
23 %and3 = and i32 %4, -8323073
24 store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
25 store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
26 %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
27 %or4 = or i32 %5, 65536
28 store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
29 %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
30 %or6.i.i = or i32 %6, 16
31 store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
32 %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
33 %and7.i.i = and i32 %7, -4
34 store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
35 %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
36 %or8.i.i = or i32 %8, 2
37 store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
38 ret void
39 }
40
41 attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }