llvm.org GIT mirror llvm / fcb98b7
[ARM] Add OptMinSize to ARMSubtarget In many places in the backend, we like to know whether we're optimising for code size and this is performed by checking the current machine function attributes. A subtarget is created on a per-function basis, so it's possible to know when we're compiling for code size on construction so record this in the new object. Differential Revision: https://reviews.llvm.org/D57812 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353501 91177308-0d34-0410-b5e6-96231b3b80d8 Sam Parker 1 year, 7 months ago
15 changed file(s) with 63 addition(s) and 50 deletion(s). Raw diff Collapse all Expand all
22642264 unsigned NumBytes) {
22652265 // This optimisation potentially adds lots of load and store
22662266 // micro-operations, it's only really a great benefit to code-size.
2267 if (!MF.getFunction().optForMinSize())
2267 if (!Subtarget.optForMinSize())
22682268 return false;
22692269
22702270 // If only one register is pushed/popped, LLVM can use an LDR/STR
496496 }
497497
498498 unsigned ResultReg = 0;
499 if (Subtarget->useMovt(*FuncInfo.MF))
499 if (Subtarget->useMovt())
500500 ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
501501
502502 if (ResultReg)
554554 bool IsPositionIndependent = isPositionIndependent();
555555 // Use movw+movt when possible, it avoids constant pool entries.
556556 // Non-darwin targets only support static movt relocations in FastISel.
557 if (Subtarget->useMovt(*FuncInfo.MF) &&
557 if (Subtarget->useMovt() &&
558558 (Subtarget->isTargetMachO() || !IsPositionIndependent)) {
559559 unsigned Opc;
560560 unsigned char TF = 0;
464464 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
465465 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
466466 }
467 if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
467 if (Subtarget->useMovt()) return 2; // MOVW + MOVT
468468 return 3; // Literal pool load
469469 }
470470
20682068 auto *GV = cast(Callee)->getGlobal();
20692069 auto *BB = CLI.CS.getParent();
20702070 bool PreferIndirect =
2071 Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
2071 Subtarget->isThumb() && Subtarget->optForMinSize() &&
20722072 count_if(GV->users(), [&BB](const User *U) {
20732073 return isa(U) && cast(U)->getParent() == BB;
20742074 }) > 2;
21402140 CallOpc = ARMISD::CALL_NOLINK;
21412141 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
21422142 // Emit regular call when code size is the priority
2143 !MF.getFunction().optForMinSize())
2143 !Subtarget->optForMinSize())
21442144 // "mov lr, pc; b _foo" to avoid confusing the RSP
21452145 CallOpc = ARMISD::CALL_NOLINK;
21462146 else
32233223 } else if (Subtarget->isRWPI() && !IsRO) {
32243224 // SB-relative.
32253225 SDValue RelAddr;
3226 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3226 if (Subtarget->useMovt()) {
32273227 ++NumMovwMovt;
32283228 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
32293229 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
32433243
32443244 // If we have T2 ops, we can materialize the address directly via movt/movw
32453245 // pair. This is always cheaper.
3246 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3246 if (Subtarget->useMovt()) {
32473247 ++NumMovwMovt;
32483248 // FIXME: Once remat is capable of dealing with instructions with register
32493249 // operands, expand this into two nodes.
32663266 SDLoc dl(Op);
32673267 const GlobalValue *GV = cast(Op)->getGlobal();
32683268
3269 if (Subtarget->useMovt(DAG.getMachineFunction()))
3269 if (Subtarget->useMovt())
32703270 ++NumMovwMovt;
32713271
32723272 // FIXME: Once remat is capable of dealing with instructions with register
32863286 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
32873287 SelectionDAG &DAG) const {
32883288 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3289 assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
3289 assert(Subtarget->useMovt() &&
32903290 "Windows on ARM expects to use movw/movt");
32913291 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
32923292 "ROPI/RWPI not currently supported for Windows");
78077807 return SDValue();
78087808
78097809 const auto &ST = static_cast(DAG.getSubtarget());
7810 const auto &MF = DAG.getMachineFunction();
7811 const bool MinSize = MF.getFunction().optForMinSize();
7810 const bool MinSize = ST.optForMinSize();
78127811 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
78137812 : ST.hasDivideInARMMode();
78147813
89788977
89798978 // Load an immediate to varEnd.
89808979 unsigned varEnd = MRI.createVirtualRegister(TRC);
8981 if (Subtarget->useMovt(*MF)) {
8980 if (Subtarget->useMovt()) {
89828981 unsigned Vtmp = varEnd;
89838982 if ((LoopSize & 0xFFFF0000) != 0)
89848983 Vtmp = MRI.createVirtualRegister(TRC);
1471314712 return Subtarget->hasV6T2Ops();
1471414713 }
1471514714
14715 bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
14716 return !Subtarget->optForMinSize();
14717 }
14718
1471614719 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
1471714720 AtomicOrdering Ord) const {
1471814721 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
566566 return HasStandaloneRem;
567567 }
568568
569 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
570 if (DAG.getMachineFunction().getFunction().optForMinSize())
571 return false;
572 return true;
573 }
569 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
574570
575571 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
576572 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
9393 const ARMSubtarget &Subtarget = MF.getSubtarget();
9494 const TargetMachine &TM = MF.getTarget();
9595
96 if (!Subtarget.useMovt(MF)) {
96 if (!Subtarget.useMovt()) {
9797 if (TM.isPositionIndependent())
9898 expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
9999 else
353353
354354 // FIXME: Eventually this will be just "hasV6T2Ops".
355355 let RecomputePerFunction = 1 in {
356 def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
357 def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
358 def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
359 def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
356 def UseMovt : Predicate<"Subtarget->useMovt()">;
357 def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
358 def UseMovtInPic : Predicate<"Subtarget->useMovt() && Subtarget->allowPositionIndependentMovt()">;
359 def DontUseMovtInPic : Predicate<"!Subtarget->useMovt() || !Subtarget->allowPositionIndependentMovt()">;
360360
361361 def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
362362 " TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
363 "MF->getFunction().optForMinSize())">;
363 "Subtarget->optForMinSize())">;
364364 }
365365 def UseMulOps : Predicate<"Subtarget->useMulOps()">;
366366
717717
718718 /// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal()
719719 def arm_i32imm : PatLeaf<(imm), [{
720 if (Subtarget->useMovt(*MF))
720 if (Subtarget->useMovt())
721721 return true;
722722 return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
723723 }]> {
724724 // Ideally this would be an IntImmLeaf, but then we wouldn't have access to
725725 // the MachineFunction.
726726 let GISelPredicateCode = [{
727 const auto &MF = *MI.getParent()->getParent();
728 if (STI.useMovt(MF))
727 if (STI.useMovt())
729728 return true;
730729
731730 const auto &MO = MI.getOperand(1);
580580 auto &MBB = *MIB->getParent();
581581 auto &MF = *MBB.getParent();
582582
583 bool UseMovt = STI.useMovt(MF);
583 bool UseMovt = STI.useMovt();
584584
585585 unsigned Size = TM.getPointerSize(0);
586586 unsigned Alignment = 4;
12851285 // can still change to a writeback form as that will save us 2 bytes
12861286 // of code size. It can create WAW hazards though, so only do it if
12871287 // we're minimizing code size.
1288 if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
1288 if (!STI->optForMinSize() || !BaseKill)
12891289 return false;
12901290
12911291 bool HighRegsUsed = false;
300300 (decimate (rotl SPR, 1), 4),
301301 (decimate (rotl SPR, 1), 2))];
302302 let AltOrderSelect = [{
303 return 1 + MF.getSubtarget().useStride4VFPs(MF);
303 return 1 + MF.getSubtarget().useStride4VFPs();
304304 }];
305305 let DiagnosticString = "operand must be a register in range [s0, s31]";
306306 }
312312 (decimate (rotl HPR, 1), 4),
313313 (decimate (rotl HPR, 1), 2))];
314314 let AltOrderSelect = [{
315 return 1 + MF.getSubtarget().useStride4VFPs(MF);
315 return 1 + MF.getSubtarget().useStride4VFPs();
316316 }];
317317 let DiagnosticString = "operand must be a register in range [s0, s31]";
318318 }
334334 let AltOrders = [(rotl DPR, 16),
335335 (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
336336 let AltOrderSelect = [{
337 return 1 + MF.getSubtarget().useStride4VFPs(MF);
337 return 1 + MF.getSubtarget().useStride4VFPs();
338338 }];
339339 let DiagnosticType = "DPR";
340340 }
169169
170170 // Code size optimisation: do not inline memcpy if expansion results in
171171 // more instructions than the libary call.
172 if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) {
172 if (NumMEMCPYs > 1 && Subtarget.optForMinSize()) {
173173 return SDValue();
174174 }
175175
9191
9292 ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
9393 const std::string &FS,
94 const ARMBaseTargetMachine &TM, bool IsLittle)
94 const ARMBaseTargetMachine &TM, bool IsLittle,
95 bool MinSize)
9596 : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
96 CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
97 TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
97 CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle),
98 TargetTriple(TT), Options(TM.Options), TM(TM),
99 FrameLowering(initializeFrameLowering(CPU, FS)),
98100 // At this point initializeSubtargetDependencies has been called so
99101 // we can query directly.
100102 InstrInfo(isThumb1Only()
372374
373375 bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
374376
375 bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
377 bool ARMSubtarget::useStride4VFPs() const {
376378 // For general targets, the prologue can grow when VFPs are allocated with
377379 // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
378380 // format which it's more important to get right.
379381 return isTargetWatchABI() ||
380 (useWideStrideVFP() && !MF.getFunction().optForMinSize());
381 }
382
383 bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
382 (useWideStrideVFP() && !OptMinSize);
383 }
384
385 bool ARMSubtarget::useMovt() const {
384386 // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
385387 // immediates as it is inherently position independent, and may be out of
386388 // range otherwise.
387389 return !NoMovt && hasV8MBaselineOps() &&
388 (isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly());
390 (isTargetWindows() || !OptMinSize || genExecuteOnly());
389391 }
390392
391393 bool ARMSubtarget::useFastISel() const {
444444 /// What alignment is preferred for loop bodies, in log2(bytes).
445445 unsigned PrefLoopAlignment = 0;
446446
447 /// OptMinSize - True if we're optimising for minimum code size, equal to
448 /// the function attribute.
449 bool OptMinSize = false;
450
447451 /// IsLittle - The target is Little Endian
448452 bool IsLittle;
449453
466470 /// of the specified triple.
467471 ///
468472 ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
469 const ARMBaseTargetMachine &TM, bool IsLittle);
473 const ARMBaseTargetMachine &TM, bool IsLittle,
474 bool MinSize = false);
470475
471476 /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
472477 /// that still makes it profitable to inline the call.
708713 bool disablePostRAScheduler() const { return DisablePostRAScheduler; }
709714 bool useSoftFloat() const { return UseSoftFloat; }
710715 bool isThumb() const { return InThumbMode; }
716 bool optForMinSize() const { return OptMinSize; }
711717 bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
712718 bool isThumb2() const { return InThumbMode && HasThumb2; }
713719 bool hasThumb2() const { return HasThumb2; }
734740 isThumb1Only();
735741 }
736742
737 bool useStride4VFPs(const MachineFunction &MF) const;
738
739 bool useMovt(const MachineFunction &MF) const;
743 bool useStride4VFPs() const;
744
745 bool useMovt() const;
740746
741747 bool supportsTailCall() const { return SupportsTailCall; }
742748
262262 if (SoftFloat)
263263 FS += FS.empty() ? "+soft-float" : ",+soft-float";
264264
265 auto &I = SubtargetMap[CPU + FS];
265 // Use the optminsize to identify the subtarget, but don't use it in the
266 // feature string.
267 std::string Key = CPU + FS;
268 if (F.optForMinSize())
269 Key += "+minsize";
270
271 auto &I = SubtargetMap[Key];
266272 if (!I) {
267273 // This needs to be done before we create a new subtarget since any
268274 // creation will depend on the TM and the code generation flags on the
269275 // function that reside in TargetOptions.
270276 resetTargetOptions(F);
271 I = llvm::make_unique(TargetTriple, CPU, FS, *this, isLittle);
277 I = llvm::make_unique(TargetTriple, CPU, FS, *this, isLittle,
278 F.optForMinSize());
272279
273280 if (!I->isThumb() && !I->hasARMOps())
274281 F.getContext().emitError("Function '" + F.getName() + "' uses ARM "
452452 break;
453453 case ARM::t2LDR_POST:
454454 case ARM::t2STR_POST: {
455 if (!MBB.getParent()->getFunction().optForMinSize())
455 if (!MinimizeSize)
456456 return false;
457457
458458 if (!MI->hasOneMemOperand() ||
11271127
11281128 // Optimizing / minimizing size? Minimizing size implies optimizing for size.
11291129 OptimizeSize = MF.getFunction().optForSize();
1130 MinimizeSize = MF.getFunction().optForMinSize();
1130 MinimizeSize = STI->optForMinSize();
11311131
11321132 BlockInfo.clear();
11331133 BlockInfo.resize(MF.getNumBlockIDs());