LLVM 19.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
43#include "llvm/Config/llvm-config.h"
44#include "llvm/IR/Argument.h"
45#include "llvm/IR/Attributes.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/Constant.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DebugInfo.h"
52#include "llvm/IR/Dominators.h"
53#include "llvm/IR/Function.h"
55#include "llvm/IR/GlobalValue.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/InlineAsm.h"
59#include "llvm/IR/InstrTypes.h"
60#include "llvm/IR/Instruction.h"
63#include "llvm/IR/Intrinsics.h"
64#include "llvm/IR/IntrinsicsAArch64.h"
65#include "llvm/IR/LLVMContext.h"
66#include "llvm/IR/MDBuilder.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/Operator.h"
71#include "llvm/IR/Statepoint.h"
72#include "llvm/IR/Type.h"
73#include "llvm/IR/Use.h"
74#include "llvm/IR/User.h"
75#include "llvm/IR/Value.h"
76#include "llvm/IR/ValueHandle.h"
77#include "llvm/IR/ValueMap.h"
79#include "llvm/Pass.h"
85#include "llvm/Support/Debug.h"
96#include <algorithm>
97#include <cassert>
98#include <cstdint>
99#include <iterator>
100#include <limits>
101#include <memory>
102#include <optional>
103#include <utility>
104#include <vector>
105
106using namespace llvm;
107using namespace llvm::PatternMatch;
108
109#define DEBUG_TYPE "codegenprepare"
110
111STATISTIC(NumBlocksElim, "Number of blocks eliminated");
112STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
113STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
114STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
115 "sunken Cmps");
116STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
117 "of sunken Casts");
118STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
119 "computations were sunk");
120STATISTIC(NumMemoryInstsPhiCreated,
121 "Number of phis created when address "
122 "computations were sunk to memory instructions");
123STATISTIC(NumMemoryInstsSelectCreated,
124 "Number of select created when address "
125 "computations were sunk to memory instructions");
126STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
127STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
128STATISTIC(NumAndsAdded,
129 "Number of and mask instructions added to form ext loads");
130STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
131STATISTIC(NumRetsDup, "Number of return instructions duplicated");
132STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
133STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
134STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
135
137 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
138 cl::desc("Disable branch optimizations in CodeGenPrepare"));
139
140static cl::opt<bool>
141 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
142 cl::desc("Disable GC optimizations in CodeGenPrepare"));
143
144static cl::opt<bool>
145 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
146 cl::init(false),
147 cl::desc("Disable select to branch conversion."));
148
149static cl::opt<bool>
150 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
151 cl::desc("Address sinking in CGP using GEPs."));
152
153static cl::opt<bool>
154 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
155 cl::desc("Enable sinkinig and/cmp into branches."));
156
158 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
159 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
160
162 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
163 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
164
166 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
167 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
168 "CodeGenPrepare"));
169
171 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
172 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
173 "optimization in CodeGenPrepare"));
174
176 "disable-preheader-prot", cl::Hidden, cl::init(false),
177 cl::desc("Disable protection against removing loop preheaders"));
178
180 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
181 cl::desc("Use profile info to add section prefix for hot/cold functions"));
182
184 "profile-unknown-in-special-section", cl::Hidden,
185 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
186 "profile, we cannot tell the function is cold for sure because "
187 "it may be a function newly added without ever being sampled. "
188 "With the flag enabled, compiler can put such profile unknown "
189 "functions into a special section, so runtime system can choose "
190 "to handle it in a different way than .text section, to save "
191 "RAM for example. "));
192
194 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
195 cl::desc("Use the basic-block-sections profile to determine the text "
196 "section prefix for hot functions. Functions with "
197 "basic-block-sections profile will be placed in `.text.hot` "
198 "regardless of their FDO profile info. Other functions won't be "
199 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
200 "profiles."));
201
203 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
204 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
205 "(frequency of destination block) is greater than this ratio"));
206
208 "force-split-store", cl::Hidden, cl::init(false),
209 cl::desc("Force store splitting no matter what the target query says."));
210
212 "cgp-type-promotion-merge", cl::Hidden,
213 cl::desc("Enable merging of redundant sexts when one is dominating"
214 " the other."),
215 cl::init(true));
216
218 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
219 cl::desc("Disables combining addressing modes with different parts "
220 "in optimizeMemoryInst."));
221
222static cl::opt<bool>
223 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
224 cl::desc("Allow creation of Phis in Address sinking."));
225
227 "addr-sink-new-select", cl::Hidden, cl::init(true),
228 cl::desc("Allow creation of selects in Address sinking."));
229
231 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
232 cl::desc("Allow combining of BaseReg field in Address sinking."));
233
235 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
236 cl::desc("Allow combining of BaseGV field in Address sinking."));
237
239 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
240 cl::desc("Allow combining of BaseOffs field in Address sinking."));
241
243 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
244 cl::desc("Allow combining of ScaledReg field in Address sinking."));
245
246static cl::opt<bool>
247 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
248 cl::init(true),
249 cl::desc("Enable splitting large offset of GEP."));
250
252 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
253 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
254
255static cl::opt<bool>
256 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
257 cl::desc("Enable BFI update verification for "
258 "CodeGenPrepare."));
259
260static cl::opt<bool>
261 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
262 cl::desc("Enable converting phi types in CodeGenPrepare"));
263
265 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
266 cl::desc("Least BB number of huge function."));
267
269 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
271 cl::desc("Max number of address users to look at"));
272
273static cl::opt<bool>
274 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
275 cl::desc("Disable elimination of dead PHI nodes."));
276
277namespace {
278
279enum ExtType {
280 ZeroExtension, // Zero extension has been seen.
281 SignExtension, // Sign extension has been seen.
282 BothExtension // This extension type is used if we saw sext after
283 // ZeroExtension had been set, or if we saw zext after
284 // SignExtension had been set. It makes the type
285 // information of a promoted instruction invalid.
286};
287
288enum ModifyDT {
289 NotModifyDT, // Not Modify any DT.
290 ModifyBBDT, // Modify the Basic Block Dominator Tree.
291 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
292 // This usually means we move/delete/insert instruction
293 // in a Basic Block. So we should re-iterate instructions
294 // in such Basic Block.
295};
296
297using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
298using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
299using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
301using ValueToSExts = MapVector<Value *, SExts>;
302
303class TypePromotionTransaction;
304
305class CodeGenPrepare {
306 friend class CodeGenPrepareLegacyPass;
307 const TargetMachine *TM = nullptr;
308 const TargetSubtargetInfo *SubtargetInfo = nullptr;
309 const TargetLowering *TLI = nullptr;
310 const TargetRegisterInfo *TRI = nullptr;
311 const TargetTransformInfo *TTI = nullptr;
312 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
313 const TargetLibraryInfo *TLInfo = nullptr;
314 LoopInfo *LI = nullptr;
315 std::unique_ptr<BlockFrequencyInfo> BFI;
316 std::unique_ptr<BranchProbabilityInfo> BPI;
317 ProfileSummaryInfo *PSI = nullptr;
318
319 /// As we scan instructions optimizing them, this is the next instruction
320 /// to optimize. Transforms that can invalidate this should update it.
321 BasicBlock::iterator CurInstIterator;
322
323 /// Keeps track of non-local addresses that have been sunk into a block.
324 /// This allows us to avoid inserting duplicate code for blocks with
325 /// multiple load/stores of the same address. The usage of WeakTrackingVH
326 /// enables SunkAddrs to be treated as a cache whose entries can be
327 /// invalidated if a sunken address computation has been erased.
329
330 /// Keeps track of all instructions inserted for the current function.
331 SetOfInstrs InsertedInsts;
332
333 /// Keeps track of the type of the related instruction before their
334 /// promotion for the current function.
335 InstrToOrigTy PromotedInsts;
336
337 /// Keep track of instructions removed during promotion.
338 SetOfInstrs RemovedInsts;
339
340 /// Keep track of sext chains based on their initial value.
341 DenseMap<Value *, Instruction *> SeenChainsForSExt;
342
343 /// Keep track of GEPs accessing the same data structures such as structs or
344 /// arrays that are candidates to be split later because of their large
345 /// size.
348 LargeOffsetGEPMap;
349
350 /// Keep track of new GEP base after splitting the GEPs having large offset.
351 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
352
353 /// Map serial numbers to Large offset GEPs.
354 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
355
356 /// Keep track of SExt promoted.
357 ValueToSExts ValToSExtendedUses;
358
359 /// True if the function has the OptSize attribute.
360 bool OptSize;
361
362 /// DataLayout for the Function being processed.
363 const DataLayout *DL = nullptr;
364
365 /// Building the dominator tree can be expensive, so we only build it
366 /// lazily and update it when required.
367 std::unique_ptr<DominatorTree> DT;
368
369public:
370 CodeGenPrepare(){};
371 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
372 /// If encounter huge function, we need to limit the build time.
373 bool IsHugeFunc = false;
374
375 /// FreshBBs is like worklist, it collected the updated BBs which need
376 /// to be optimized again.
377 /// Note: Consider building time in this pass, when a BB updated, we need
378 /// to insert such BB into FreshBBs for huge function.
380
381 void releaseMemory() {
382 // Clear per function information.
383 InsertedInsts.clear();
384 PromotedInsts.clear();
385 FreshBBs.clear();
386 BPI.reset();
387 BFI.reset();
388 }
389
391
392private:
393 template <typename F>
394 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
395 // Substituting can cause recursive simplifications, which can invalidate
396 // our iterator. Use a WeakTrackingVH to hold onto it in case this
397 // happens.
398 Value *CurValue = &*CurInstIterator;
399 WeakTrackingVH IterHandle(CurValue);
400
401 f();
402
403 // If the iterator instruction was recursively deleted, start over at the
404 // start of the block.
405 if (IterHandle != CurValue) {
406 CurInstIterator = BB->begin();
407 SunkAddrs.clear();
408 }
409 }
410
411 // Get the DominatorTree, building if necessary.
412 DominatorTree &getDT(Function &F) {
413 if (!DT)
414 DT = std::make_unique<DominatorTree>(F);
415 return *DT;
416 }
417
418 void removeAllAssertingVHReferences(Value *V);
419 bool eliminateAssumptions(Function &F);
420 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
421 bool eliminateMostlyEmptyBlocks(Function &F);
422 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
423 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
424 void eliminateMostlyEmptyBlock(BasicBlock *BB);
425 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
426 bool isPreheader);
427 bool makeBitReverse(Instruction &I);
428 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
429 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
430 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
431 unsigned AddrSpace);
432 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
433 bool optimizeInlineAsmInst(CallInst *CS);
434 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
435 bool optimizeExt(Instruction *&I);
436 bool optimizeExtUses(Instruction *I);
437 bool optimizeLoadExt(LoadInst *Load);
438 bool optimizeShiftInst(BinaryOperator *BO);
439 bool optimizeFunnelShift(IntrinsicInst *Fsh);
440 bool optimizeSelectInst(SelectInst *SI);
441 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
442 bool optimizeSwitchType(SwitchInst *SI);
443 bool optimizeSwitchPhiConstants(SwitchInst *SI);
444 bool optimizeSwitchInst(SwitchInst *SI);
445 bool optimizeExtractElementInst(Instruction *Inst);
446 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
447 bool fixupDbgValue(Instruction *I);
448 bool fixupDbgVariableRecord(DbgVariableRecord &I);
449 bool fixupDbgVariableRecordsOnInst(Instruction &I);
450 bool placeDbgValues(Function &F);
451 bool placePseudoProbes(Function &F);
452 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
453 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
454 bool tryToPromoteExts(TypePromotionTransaction &TPT,
456 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457 unsigned CreatedInstsCost = 0);
458 bool mergeSExts(Function &F);
459 bool splitLargeGEPOffsets();
460 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
461 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462 bool optimizePhiTypes(Function &F);
463 bool performAddressTypePromotion(
464 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
465 bool HasPromoted, TypePromotionTransaction &TPT,
466 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468 bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470 bool tryToSinkFreeOperands(Instruction *I);
471 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
472 CmpInst *Cmp, Intrinsic::ID IID);
473 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
475 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 void verifyBFIUpdates(Function &F);
477 bool _run(Function &F);
478};
479
480class CodeGenPrepareLegacyPass : public FunctionPass {
481public:
482 static char ID; // Pass identification, replacement for typeid
483
484 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
486 }
487
488 bool runOnFunction(Function &F) override;
489
490 StringRef getPassName() const override { return "CodeGen Prepare"; }
491
492 void getAnalysisUsage(AnalysisUsage &AU) const override {
493 // FIXME: When we can selectively preserve passes, preserve the domtree.
500 }
501};
502
503} // end anonymous namespace
504
505char CodeGenPrepareLegacyPass::ID = 0;
506
507bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
508 if (skipFunction(F))
509 return false;
510 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
511 CodeGenPrepare CGP(TM);
512 CGP.DL = &F.getParent()->getDataLayout();
513 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
514 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
515 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
516 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
517 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
518 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
519 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
520 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
521 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
522 auto BBSPRWP =
523 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
524 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
525
526 return CGP._run(F);
527}
528
529INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
530 "Optimize for code generation", false, false)
537INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
538 "Optimize for code generation", false, false)
539
541 return new CodeGenPrepareLegacyPass();
542}
543
546 CodeGenPrepare CGP(TM);
547
548 bool Changed = CGP.run(F, AM);
549 if (!Changed)
550 return PreservedAnalyses::all();
551
556 return PA;
557}
558
559bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
560 DL = &F.getParent()->getDataLayout();
561 SubtargetInfo = TM->getSubtargetImpl(F);
562 TLI = SubtargetInfo->getTargetLowering();
563 TRI = SubtargetInfo->getRegisterInfo();
564 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
566 LI = &AM.getResult<LoopAnalysis>(F);
567 BPI.reset(new BranchProbabilityInfo(F, *LI));
568 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
569 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
570 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
571 BBSectionsProfileReader =
573 return _run(F);
574}
575
576bool CodeGenPrepare::_run(Function &F) {
577 bool EverMadeChange = false;
578
579 OptSize = F.hasOptSize();
580 // Use the basic-block-sections profile to promote hot functions to .text.hot
581 // if requested.
582 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
583 BBSectionsProfileReader->isFunctionHot(F.getName())) {
584 F.setSectionPrefix("hot");
585 } else if (ProfileGuidedSectionPrefix) {
586 // The hot attribute overwrites profile count based hotness while profile
587 // counts based hotness overwrite the cold attribute.
588 // This is a conservative behabvior.
589 if (F.hasFnAttribute(Attribute::Hot) ||
590 PSI->isFunctionHotInCallGraph(&F, *BFI))
591 F.setSectionPrefix("hot");
592 // If PSI shows this function is not hot, we will placed the function
593 // into unlikely section if (1) PSI shows this is a cold function, or
594 // (2) the function has a attribute of cold.
595 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
596 F.hasFnAttribute(Attribute::Cold))
597 F.setSectionPrefix("unlikely");
598 else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
599 PSI->isFunctionHotnessUnknown(F))
600 F.setSectionPrefix("unknown");
601 }
602
603 /// This optimization identifies DIV instructions that can be
604 /// profitably bypassed and carried out with a shorter, faster divide.
605 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
606 const DenseMap<unsigned int, unsigned int> &BypassWidths =
608 BasicBlock *BB = &*F.begin();
609 while (BB != nullptr) {
610 // bypassSlowDivision may create new BBs, but we don't want to reapply the
611 // optimization to those blocks.
612 BasicBlock *Next = BB->getNextNode();
613 // F.hasOptSize is already checked in the outer if statement.
614 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
615 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
616 BB = Next;
617 }
618 }
619
620 // Get rid of @llvm.assume builtins before attempting to eliminate empty
621 // blocks, since there might be blocks that only contain @llvm.assume calls
622 // (plus arguments that we can get rid of).
623 EverMadeChange |= eliminateAssumptions(F);
624
625 // Eliminate blocks that contain only PHI nodes and an
626 // unconditional branch.
627 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
628
629 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
631 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
632
633 // Split some critical edges where one of the sources is an indirect branch,
634 // to help generate sane code for PHIs involving such edges.
635 EverMadeChange |=
636 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
637
638 // If we are optimzing huge function, we need to consider the build time.
639 // Because the basic algorithm's complex is near O(N!).
640 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
641
642 // Transformations above may invalidate dominator tree and/or loop info.
643 DT.reset();
644 LI->releaseMemory();
645 LI->analyze(getDT(F));
646
647 bool MadeChange = true;
648 bool FuncIterated = false;
649 while (MadeChange) {
650 MadeChange = false;
651
653 if (FuncIterated && !FreshBBs.contains(&BB))
654 continue;
655
656 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
657 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
658
659 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
660 DT.reset();
661
662 MadeChange |= Changed;
663 if (IsHugeFunc) {
664 // If the BB is updated, it may still has chance to be optimized.
665 // This usually happen at sink optimization.
666 // For example:
667 //
668 // bb0:
669 // %and = and i32 %a, 4
670 // %cmp = icmp eq i32 %and, 0
671 //
672 // If the %cmp sink to other BB, the %and will has chance to sink.
673 if (Changed)
674 FreshBBs.insert(&BB);
675 else if (FuncIterated)
676 FreshBBs.erase(&BB);
677 } else {
678 // For small/normal functions, we restart BB iteration if the dominator
679 // tree of the Function was changed.
680 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
681 break;
682 }
683 }
684 // We have iterated all the BB in the (only work for huge) function.
685 FuncIterated = IsHugeFunc;
686
687 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
688 MadeChange |= mergeSExts(F);
689 if (!LargeOffsetGEPMap.empty())
690 MadeChange |= splitLargeGEPOffsets();
691 MadeChange |= optimizePhiTypes(F);
692
693 if (MadeChange)
694 eliminateFallThrough(F, DT.get());
695
696#ifndef NDEBUG
697 if (MadeChange && VerifyLoopInfo)
698 LI->verify(getDT(F));
699#endif
700
701 // Really free removed instructions during promotion.
702 for (Instruction *I : RemovedInsts)
703 I->deleteValue();
704
705 EverMadeChange |= MadeChange;
706 SeenChainsForSExt.clear();
707 ValToSExtendedUses.clear();
708 RemovedInsts.clear();
709 LargeOffsetGEPMap.clear();
710 LargeOffsetGEPID.clear();
711 }
712
713 NewGEPBases.clear();
714 SunkAddrs.clear();
715
716 if (!DisableBranchOpts) {
717 MadeChange = false;
718 // Use a set vector to get deterministic iteration order. The order the
719 // blocks are removed may affect whether or not PHI nodes in successors
720 // are removed.
722 for (BasicBlock &BB : F) {
724 MadeChange |= ConstantFoldTerminator(&BB, true);
725 if (!MadeChange)
726 continue;
727
728 for (BasicBlock *Succ : Successors)
729 if (pred_empty(Succ))
730 WorkList.insert(Succ);
731 }
732
733 // Delete the dead blocks and any of their dead successors.
734 MadeChange |= !WorkList.empty();
735 while (!WorkList.empty()) {
736 BasicBlock *BB = WorkList.pop_back_val();
738
739 DeleteDeadBlock(BB);
740
741 for (BasicBlock *Succ : Successors)
742 if (pred_empty(Succ))
743 WorkList.insert(Succ);
744 }
745
746 // Merge pairs of basic blocks with unconditional branches, connected by
747 // a single edge.
748 if (EverMadeChange || MadeChange)
749 MadeChange |= eliminateFallThrough(F);
750
751 EverMadeChange |= MadeChange;
752 }
753
754 if (!DisableGCOpts) {
756 for (BasicBlock &BB : F)
757 for (Instruction &I : BB)
758 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
759 Statepoints.push_back(SP);
760 for (auto &I : Statepoints)
761 EverMadeChange |= simplifyOffsetableRelocate(*I);
762 }
763
764 // Do this last to clean up use-before-def scenarios introduced by other
765 // preparatory transforms.
766 EverMadeChange |= placeDbgValues(F);
767 EverMadeChange |= placePseudoProbes(F);
768
769#ifndef NDEBUG
771 verifyBFIUpdates(F);
772#endif
773
774 return EverMadeChange;
775}
776
777bool CodeGenPrepare::eliminateAssumptions(Function &F) {
778 bool MadeChange = false;
779 for (BasicBlock &BB : F) {
780 CurInstIterator = BB.begin();
781 while (CurInstIterator != BB.end()) {
782 Instruction *I = &*(CurInstIterator++);
783 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
784 MadeChange = true;
785 Value *Operand = Assume->getOperand(0);
786 Assume->eraseFromParent();
787
788 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
789 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
790 });
791 }
792 }
793 }
794 return MadeChange;
795}
796
797/// An instruction is about to be deleted, so remove all references to it in our
798/// GEP-tracking data strcutures.
799void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
800 LargeOffsetGEPMap.erase(V);
801 NewGEPBases.erase(V);
802
803 auto GEP = dyn_cast<GetElementPtrInst>(V);
804 if (!GEP)
805 return;
806
807 LargeOffsetGEPID.erase(GEP);
808
809 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
810 if (VecI == LargeOffsetGEPMap.end())
811 return;
812
813 auto &GEPVector = VecI->second;
814 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
815
816 if (GEPVector.empty())
817 LargeOffsetGEPMap.erase(VecI);
818}
819
820// Verify BFI has been updated correctly by recomputing BFI and comparing them.
821void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
822 DominatorTree NewDT(F);
823 LoopInfo NewLI(NewDT);
824 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
825 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
826 NewBFI.verifyMatch(*BFI);
827}
828
829/// Merge basic blocks which are connected by a single edge, where one of the
830/// basic blocks has a single successor pointing to the other basic block,
831/// which has a single predecessor.
832bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
833 bool Changed = false;
834 // Scan all of the blocks in the function, except for the entry block.
835 // Use a temporary array to avoid iterator being invalidated when
836 // deleting blocks.
838 for (auto &Block : llvm::drop_begin(F))
839 Blocks.push_back(&Block);
840
842 for (auto &Block : Blocks) {
843 auto *BB = cast_or_null<BasicBlock>(Block);
844 if (!BB)
845 continue;
846 // If the destination block has a single pred, then this is a trivial
847 // edge, just collapse it.
848 BasicBlock *SinglePred = BB->getSinglePredecessor();
849
850 // Don't merge if BB's address is taken.
851 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
852 continue;
853
854 // Make an effort to skip unreachable blocks.
855 if (DT && !DT->isReachableFromEntry(BB))
856 continue;
857
858 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
859 if (Term && !Term->isConditional()) {
860 Changed = true;
861 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
862
863 // Merge BB into SinglePred and delete it.
864 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
865 /* MemDep */ nullptr,
866 /* PredecessorWithTwoSuccessors */ false, DT);
867 Preds.insert(SinglePred);
868
869 if (IsHugeFunc) {
870 // Update FreshBBs to optimize the merged BB.
871 FreshBBs.insert(SinglePred);
872 FreshBBs.erase(BB);
873 }
874 }
875 }
876
877 // (Repeatedly) merging blocks into their predecessors can create redundant
878 // debug intrinsics.
879 for (const auto &Pred : Preds)
880 if (auto *BB = cast_or_null<BasicBlock>(Pred))
882
883 return Changed;
884}
885
886/// Find a destination block from BB if BB is mergeable empty block.
887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
888 // If this block doesn't end with an uncond branch, ignore it.
889 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
890 if (!BI || !BI->isUnconditional())
891 return nullptr;
892
893 // If the instruction before the branch (skipping debug info) isn't a phi
894 // node, then other stuff is happening here.
896 if (BBI != BB->begin()) {
897 --BBI;
898 while (isa<DbgInfoIntrinsic>(BBI)) {
899 if (BBI == BB->begin())
900 break;
901 --BBI;
902 }
903 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
904 return nullptr;
905 }
906
907 // Do not break infinite loops.
908 BasicBlock *DestBB = BI->getSuccessor(0);
909 if (DestBB == BB)
910 return nullptr;
911
912 if (!canMergeBlocks(BB, DestBB))
913 DestBB = nullptr;
914
915 return DestBB;
916}
917
918/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
919/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
920/// edges in ways that are non-optimal for isel. Start by eliminating these
921/// blocks so we can split them the way we want them.
922bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
924 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
925 while (!LoopList.empty()) {
926 Loop *L = LoopList.pop_back_val();
927 llvm::append_range(LoopList, *L);
928 if (BasicBlock *Preheader = L->getLoopPreheader())
929 Preheaders.insert(Preheader);
930 }
931
932 bool MadeChange = false;
933 // Copy blocks into a temporary array to avoid iterator invalidation issues
934 // as we remove them.
935 // Note that this intentionally skips the entry block.
937 for (auto &Block : llvm::drop_begin(F)) {
938 // Delete phi nodes that could block deleting other empty blocks.
940 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
941 Blocks.push_back(&Block);
942 }
943
944 for (auto &Block : Blocks) {
945 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
946 if (!BB)
947 continue;
948 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
949 if (!DestBB ||
950 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
951 continue;
952
953 eliminateMostlyEmptyBlock(BB);
954 MadeChange = true;
955 }
956 return MadeChange;
957}
958
959bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
960 BasicBlock *DestBB,
961 bool isPreheader) {
962 // Do not delete loop preheaders if doing so would create a critical edge.
963 // Loop preheaders can be good locations to spill registers. If the
964 // preheader is deleted and we create a critical edge, registers may be
965 // spilled in the loop body instead.
966 if (!DisablePreheaderProtect && isPreheader &&
967 !(BB->getSinglePredecessor() &&
969 return false;
970
971 // Skip merging if the block's successor is also a successor to any callbr
972 // that leads to this block.
973 // FIXME: Is this really needed? Is this a correctness issue?
974 for (BasicBlock *Pred : predecessors(BB)) {
975 if (isa<CallBrInst>(Pred->getTerminator()) &&
976 llvm::is_contained(successors(Pred), DestBB))
977 return false;
978 }
979
980 // Try to skip merging if the unique predecessor of BB is terminated by a
981 // switch or indirect branch instruction, and BB is used as an incoming block
982 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
983 // add COPY instructions in the predecessor of BB instead of BB (if it is not
984 // merged). Note that the critical edge created by merging such blocks wont be
985 // split in MachineSink because the jump table is not analyzable. By keeping
986 // such empty block (BB), ISel will place COPY instructions in BB, not in the
987 // predecessor of BB.
988 BasicBlock *Pred = BB->getUniquePredecessor();
989 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
990 isa<IndirectBrInst>(Pred->getTerminator())))
991 return true;
992
993 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
994 return true;
995
996 // We use a simple cost heuristic which determine skipping merging is
997 // profitable if the cost of skipping merging is less than the cost of
998 // merging : Cost(skipping merging) < Cost(merging BB), where the
999 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1000 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1001 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1002 // Freq(Pred) / Freq(BB) > 2.
1003 // Note that if there are multiple empty blocks sharing the same incoming
1004 // value for the PHIs in the DestBB, we consider them together. In such
1005 // case, Cost(merging BB) will be the sum of their frequencies.
1006
1007 if (!isa<PHINode>(DestBB->begin()))
1008 return true;
1009
1010 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1011
1012 // Find all other incoming blocks from which incoming values of all PHIs in
1013 // DestBB are the same as the ones from BB.
1014 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1015 if (DestBBPred == BB)
1016 continue;
1017
1018 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1019 return DestPN.getIncomingValueForBlock(BB) ==
1020 DestPN.getIncomingValueForBlock(DestBBPred);
1021 }))
1022 SameIncomingValueBBs.insert(DestBBPred);
1023 }
1024
1025 // See if all BB's incoming values are same as the value from Pred. In this
1026 // case, no reason to skip merging because COPYs are expected to be place in
1027 // Pred already.
1028 if (SameIncomingValueBBs.count(Pred))
1029 return true;
1030
1031 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1032 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1033
1034 for (auto *SameValueBB : SameIncomingValueBBs)
1035 if (SameValueBB->getUniquePredecessor() == Pred &&
1036 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1037 BBFreq += BFI->getBlockFreq(SameValueBB);
1038
1039 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1040 return !Limit || PredFreq <= *Limit;
1041}
1042
1043/// Return true if we can merge BB into DestBB if there is a single
1044/// unconditional branch between them, and BB contains no other non-phi
1045/// instructions.
1046bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1047 const BasicBlock *DestBB) const {
1048 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1049 // the successor. If there are more complex condition (e.g. preheaders),
1050 // don't mess around with them.
1051 for (const PHINode &PN : BB->phis()) {
1052 for (const User *U : PN.users()) {
1053 const Instruction *UI = cast<Instruction>(U);
1054 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1055 return false;
1056 // If User is inside DestBB block and it is a PHINode then check
1057 // incoming value. If incoming value is not from BB then this is
1058 // a complex condition (e.g. preheaders) we want to avoid here.
1059 if (UI->getParent() == DestBB) {
1060 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1061 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1062 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1063 if (Insn && Insn->getParent() == BB &&
1064 Insn->getParent() != UPN->getIncomingBlock(I))
1065 return false;
1066 }
1067 }
1068 }
1069 }
1070
1071 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1072 // and DestBB may have conflicting incoming values for the block. If so, we
1073 // can't merge the block.
1074 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1075 if (!DestBBPN)
1076 return true; // no conflict.
1077
1078 // Collect the preds of BB.
1080 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1081 // It is faster to get preds from a PHI than with pred_iterator.
1082 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1083 BBPreds.insert(BBPN->getIncomingBlock(i));
1084 } else {
1085 BBPreds.insert(pred_begin(BB), pred_end(BB));
1086 }
1087
1088 // Walk the preds of DestBB.
1089 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1090 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1091 if (BBPreds.count(Pred)) { // Common predecessor?
1092 for (const PHINode &PN : DestBB->phis()) {
1093 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1094 const Value *V2 = PN.getIncomingValueForBlock(BB);
1095
1096 // If V2 is a phi node in BB, look up what the mapped value will be.
1097 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1098 if (V2PN->getParent() == BB)
1099 V2 = V2PN->getIncomingValueForBlock(Pred);
1100
1101 // If there is a conflict, bail out.
1102 if (V1 != V2)
1103 return false;
1104 }
1105 }
1106 }
1107
1108 return true;
1109}
1110
1111/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1112static void replaceAllUsesWith(Value *Old, Value *New,
1114 bool IsHuge) {
1115 auto *OldI = dyn_cast<Instruction>(Old);
1116 if (OldI) {
1117 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1118 UI != E; ++UI) {
1119 Instruction *User = cast<Instruction>(*UI);
1120 if (IsHuge)
1121 FreshBBs.insert(User->getParent());
1122 }
1123 }
1124 Old->replaceAllUsesWith(New);
1125}
1126
1127/// Eliminate a basic block that has only phi's and an unconditional branch in
1128/// it.
1129void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1130 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1131 BasicBlock *DestBB = BI->getSuccessor(0);
1132
1133 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1134 << *BB << *DestBB);
1135
1136 // If the destination block has a single pred, then this is a trivial edge,
1137 // just collapse it.
1138 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1139 if (SinglePred != DestBB) {
1140 assert(SinglePred == BB &&
1141 "Single predecessor not the same as predecessor");
1142 // Merge DestBB into SinglePred/BB and delete it.
1144 // Note: BB(=SinglePred) will not be deleted on this path.
1145 // DestBB(=its single successor) is the one that was deleted.
1146 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1147
1148 if (IsHugeFunc) {
1149 // Update FreshBBs to optimize the merged BB.
1150 FreshBBs.insert(SinglePred);
1151 FreshBBs.erase(DestBB);
1152 }
1153 return;
1154 }
1155 }
1156
1157 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1158 // to handle the new incoming edges it is about to have.
1159 for (PHINode &PN : DestBB->phis()) {
1160 // Remove the incoming value for BB, and remember it.
1161 Value *InVal = PN.removeIncomingValue(BB, false);
1162
1163 // Two options: either the InVal is a phi node defined in BB or it is some
1164 // value that dominates BB.
1165 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1166 if (InValPhi && InValPhi->getParent() == BB) {
1167 // Add all of the input values of the input PHI as inputs of this phi.
1168 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1169 PN.addIncoming(InValPhi->getIncomingValue(i),
1170 InValPhi->getIncomingBlock(i));
1171 } else {
1172 // Otherwise, add one instance of the dominating value for each edge that
1173 // we will be adding.
1174 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1175 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1176 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1177 } else {
1178 for (BasicBlock *Pred : predecessors(BB))
1179 PN.addIncoming(InVal, Pred);
1180 }
1181 }
1182 }
1183
1184 // The PHIs are now updated, change everything that refers to BB to use
1185 // DestBB and remove BB.
1186 BB->replaceAllUsesWith(DestBB);
1187 BB->eraseFromParent();
1188 ++NumBlocksElim;
1189
1190 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1191}
1192
1193// Computes a map of base pointer relocation instructions to corresponding
1194// derived pointer relocation instructions given a vector of all relocate calls
1196 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1198 &RelocateInstMap) {
1199 // Collect information in two maps: one primarily for locating the base object
1200 // while filling the second map; the second map is the final structure holding
1201 // a mapping between Base and corresponding Derived relocate calls
1203 for (auto *ThisRelocate : AllRelocateCalls) {
1204 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1205 ThisRelocate->getDerivedPtrIndex());
1206 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1207 }
1208 for (auto &Item : RelocateIdxMap) {
1209 std::pair<unsigned, unsigned> Key = Item.first;
1210 if (Key.first == Key.second)
1211 // Base relocation: nothing to insert
1212 continue;
1213
1214 GCRelocateInst *I = Item.second;
1215 auto BaseKey = std::make_pair(Key.first, Key.first);
1216
1217 // We're iterating over RelocateIdxMap so we cannot modify it.
1218 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1219 if (MaybeBase == RelocateIdxMap.end())
1220 // TODO: We might want to insert a new base object relocate and gep off
1221 // that, if there are enough derived object relocates.
1222 continue;
1223
1224 RelocateInstMap[MaybeBase->second].push_back(I);
1225 }
1226}
1227
1228// Accepts a GEP and extracts the operands into a vector provided they're all
1229// small integer constants
1231 SmallVectorImpl<Value *> &OffsetV) {
1232 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1233 // Only accept small constant integer operands
1234 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1235 if (!Op || Op->getZExtValue() > 20)
1236 return false;
1237 }
1238
1239 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1240 OffsetV.push_back(GEP->getOperand(i));
1241 return true;
1242}
1243
1244// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1245// replace, computes a replacement, and affects it.
1246static bool
1248 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1249 bool MadeChange = false;
1250 // We must ensure the relocation of derived pointer is defined after
1251 // relocation of base pointer. If we find a relocation corresponding to base
1252 // defined earlier than relocation of base then we move relocation of base
1253 // right before found relocation. We consider only relocation in the same
1254 // basic block as relocation of base. Relocations from other basic block will
1255 // be skipped by optimization and we do not care about them.
1256 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1257 &*R != RelocatedBase; ++R)
1258 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1259 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1260 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1261 RelocatedBase->moveBefore(RI);
1262 MadeChange = true;
1263 break;
1264 }
1265
1266 for (GCRelocateInst *ToReplace : Targets) {
1267 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1268 "Not relocating a derived object of the original base object");
1269 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1270 // A duplicate relocate call. TODO: coalesce duplicates.
1271 continue;
1272 }
1273
1274 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1275 // Base and derived relocates are in different basic blocks.
1276 // In this case transform is only valid when base dominates derived
1277 // relocate. However it would be too expensive to check dominance
1278 // for each such relocate, so we skip the whole transformation.
1279 continue;
1280 }
1281
1282 Value *Base = ToReplace->getBasePtr();
1283 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1284 if (!Derived || Derived->getPointerOperand() != Base)
1285 continue;
1286
1288 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1289 continue;
1290
1291 // Create a Builder and replace the target callsite with a gep
1292 assert(RelocatedBase->getNextNode() &&
1293 "Should always have one since it's not a terminator");
1294
1295 // Insert after RelocatedBase
1296 IRBuilder<> Builder(RelocatedBase->getNextNode());
1297 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1298
1299 // If gc_relocate does not match the actual type, cast it to the right type.
1300 // In theory, there must be a bitcast after gc_relocate if the type does not
1301 // match, and we should reuse it to get the derived pointer. But it could be
1302 // cases like this:
1303 // bb1:
1304 // ...
1305 // %g1 = call coldcc i8 addrspace(1)*
1306 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1307 //
1308 // bb2:
1309 // ...
1310 // %g2 = call coldcc i8 addrspace(1)*
1311 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1312 //
1313 // merge:
1314 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1315 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1316 //
1317 // In this case, we can not find the bitcast any more. So we insert a new
1318 // bitcast no matter there is already one or not. In this way, we can handle
1319 // all cases, and the extra bitcast should be optimized away in later
1320 // passes.
1321 Value *ActualRelocatedBase = RelocatedBase;
1322 if (RelocatedBase->getType() != Base->getType()) {
1323 ActualRelocatedBase =
1324 Builder.CreateBitCast(RelocatedBase, Base->getType());
1325 }
1326 Value *Replacement =
1327 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1328 ArrayRef(OffsetV));
1329 Replacement->takeName(ToReplace);
1330 // If the newly generated derived pointer's type does not match the original
1331 // derived pointer's type, cast the new derived pointer to match it. Same
1332 // reasoning as above.
1333 Value *ActualReplacement = Replacement;
1334 if (Replacement->getType() != ToReplace->getType()) {
1335 ActualReplacement =
1336 Builder.CreateBitCast(Replacement, ToReplace->getType());
1337 }
1338 ToReplace->replaceAllUsesWith(ActualReplacement);
1339 ToReplace->eraseFromParent();
1340
1341 MadeChange = true;
1342 }
1343 return MadeChange;
1344}
1345
1346// Turns this:
1347//
1348// %base = ...
1349// %ptr = gep %base + 15
1350// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1351// %base' = relocate(%tok, i32 4, i32 4)
1352// %ptr' = relocate(%tok, i32 4, i32 5)
1353// %val = load %ptr'
1354//
1355// into this:
1356//
1357// %base = ...
1358// %ptr = gep %base + 15
1359// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1360// %base' = gc.relocate(%tok, i32 4, i32 4)
1361// %ptr' = gep %base' + 15
1362// %val = load %ptr'
1363bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1364 bool MadeChange = false;
1365 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1366 for (auto *U : I.users())
1367 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1368 // Collect all the relocate calls associated with a statepoint
1369 AllRelocateCalls.push_back(Relocate);
1370
1371 // We need at least one base pointer relocation + one derived pointer
1372 // relocation to mangle
1373 if (AllRelocateCalls.size() < 2)
1374 return false;
1375
1376 // RelocateInstMap is a mapping from the base relocate instruction to the
1377 // corresponding derived relocate instructions
1379 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1380 if (RelocateInstMap.empty())
1381 return false;
1382
1383 for (auto &Item : RelocateInstMap)
1384 // Item.first is the RelocatedBase to offset against
1385 // Item.second is the vector of Targets to replace
1386 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1387 return MadeChange;
1388}
1389
1390/// Sink the specified cast instruction into its user blocks.
1391static bool SinkCast(CastInst *CI) {
1392 BasicBlock *DefBB = CI->getParent();
1393
1394 /// InsertedCasts - Only insert a cast in each block once.
1396
1397 bool MadeChange = false;
1398 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1399 UI != E;) {
1400 Use &TheUse = UI.getUse();
1401 Instruction *User = cast<Instruction>(*UI);
1402
1403 // Figure out which BB this cast is used in. For PHI's this is the
1404 // appropriate predecessor block.
1405 BasicBlock *UserBB = User->getParent();
1406 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1407 UserBB = PN->getIncomingBlock(TheUse);
1408 }
1409
1410 // Preincrement use iterator so we don't invalidate it.
1411 ++UI;
1412
1413 // The first insertion point of a block containing an EH pad is after the
1414 // pad. If the pad is the user, we cannot sink the cast past the pad.
1415 if (User->isEHPad())
1416 continue;
1417
1418 // If the block selected to receive the cast is an EH pad that does not
1419 // allow non-PHI instructions before the terminator, we can't sink the
1420 // cast.
1421 if (UserBB->getTerminator()->isEHPad())
1422 continue;
1423
1424 // If this user is in the same block as the cast, don't change the cast.
1425 if (UserBB == DefBB)
1426 continue;
1427
1428 // If we have already inserted a cast into this block, use it.
1429 CastInst *&InsertedCast = InsertedCasts[UserBB];
1430
1431 if (!InsertedCast) {
1432 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1433 assert(InsertPt != UserBB->end());
1434 InsertedCast = cast<CastInst>(CI->clone());
1435 InsertedCast->insertBefore(*UserBB, InsertPt);
1436 }
1437
1438 // Replace a use of the cast with a use of the new cast.
1439 TheUse = InsertedCast;
1440 MadeChange = true;
1441 ++NumCastUses;
1442 }
1443
1444 // If we removed all uses, nuke the cast.
1445 if (CI->use_empty()) {
1446 salvageDebugInfo(*CI);
1447 CI->eraseFromParent();
1448 MadeChange = true;
1449 }
1450
1451 return MadeChange;
1452}
1453
1454/// If the specified cast instruction is a noop copy (e.g. it's casting from
1455/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1456/// reduce the number of virtual registers that must be created and coalesced.
1457///
1458/// Return true if any changes are made.
1460 const DataLayout &DL) {
1461 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1462 // than sinking only nop casts, but is helpful on some platforms.
1463 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1464 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1465 ASC->getDestAddressSpace()))
1466 return false;
1467 }
1468
1469 // If this is a noop copy,
1470 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1471 EVT DstVT = TLI.getValueType(DL, CI->getType());
1472
1473 // This is an fp<->int conversion?
1474 if (SrcVT.isInteger() != DstVT.isInteger())
1475 return false;
1476
1477 // If this is an extension, it will be a zero or sign extension, which
1478 // isn't a noop.
1479 if (SrcVT.bitsLT(DstVT))
1480 return false;
1481
1482 // If these values will be promoted, find out what they will be promoted
1483 // to. This helps us consider truncates on PPC as noop copies when they
1484 // are.
1485 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1487 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1488 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1490 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1491
1492 // If, after promotion, these are the same types, this is a noop copy.
1493 if (SrcVT != DstVT)
1494 return false;
1495
1496 return SinkCast(CI);
1497}
1498
1499// Match a simple increment by constant operation. Note that if a sub is
1500// matched, the step is negated (as if the step had been canonicalized to
1501// an add, even though we leave the instruction alone.)
1502bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1503 Constant *&Step) {
1504 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1505 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1506 m_Instruction(LHS), m_Constant(Step)))))
1507 return true;
1508 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1509 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1510 m_Instruction(LHS), m_Constant(Step))))) {
1511 Step = ConstantExpr::getNeg(Step);
1512 return true;
1513 }
1514 return false;
1515}
1516
1517/// If given \p PN is an inductive variable with value IVInc coming from the
1518/// backedge, and on each iteration it gets increased by Step, return pair
1519/// <IVInc, Step>. Otherwise, return std::nullopt.
1520static std::optional<std::pair<Instruction *, Constant *>>
1521getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1522 const Loop *L = LI->getLoopFor(PN->getParent());
1523 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1524 return std::nullopt;
1525 auto *IVInc =
1526 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1527 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1528 return std::nullopt;
1529 Instruction *LHS = nullptr;
1530 Constant *Step = nullptr;
1531 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1532 return std::make_pair(IVInc, Step);
1533 return std::nullopt;
1534}
1535
1536static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1537 auto *I = dyn_cast<Instruction>(V);
1538 if (!I)
1539 return false;
1540 Instruction *LHS = nullptr;
1541 Constant *Step = nullptr;
1542 if (!matchIncrement(I, LHS, Step))
1543 return false;
1544 if (auto *PN = dyn_cast<PHINode>(LHS))
1545 if (auto IVInc = getIVIncrement(PN, LI))
1546 return IVInc->first == I;
1547 return false;
1548}
1549
1550bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1551 Value *Arg0, Value *Arg1,
1552 CmpInst *Cmp,
1553 Intrinsic::ID IID) {
1554 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1555 if (!isIVIncrement(BO, LI))
1556 return false;
1557 const Loop *L = LI->getLoopFor(BO->getParent());
1558 assert(L && "L should not be null after isIVIncrement()");
1559 // Do not risk on moving increment into a child loop.
1560 if (LI->getLoopFor(Cmp->getParent()) != L)
1561 return false;
1562
1563 // Finally, we need to ensure that the insert point will dominate all
1564 // existing uses of the increment.
1565
1566 auto &DT = getDT(*BO->getParent()->getParent());
1567 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1568 // If we're moving up the dom tree, all uses are trivially dominated.
1569 // (This is the common case for code produced by LSR.)
1570 return true;
1571
1572 // Otherwise, special case the single use in the phi recurrence.
1573 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1574 };
1575 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1576 // We used to use a dominator tree here to allow multi-block optimization.
1577 // But that was problematic because:
1578 // 1. It could cause a perf regression by hoisting the math op into the
1579 // critical path.
1580 // 2. It could cause a perf regression by creating a value that was live
1581 // across multiple blocks and increasing register pressure.
1582 // 3. Use of a dominator tree could cause large compile-time regression.
1583 // This is because we recompute the DT on every change in the main CGP
1584 // run-loop. The recomputing is probably unnecessary in many cases, so if
1585 // that was fixed, using a DT here would be ok.
1586 //
1587 // There is one important particular case we still want to handle: if BO is
1588 // the IV increment. Important properties that make it profitable:
1589 // - We can speculate IV increment anywhere in the loop (as long as the
1590 // indvar Phi is its only user);
1591 // - Upon computing Cmp, we effectively compute something equivalent to the
1592 // IV increment (despite it loops differently in the IR). So moving it up
1593 // to the cmp point does not really increase register pressure.
1594 return false;
1595 }
1596
1597 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1598 if (BO->getOpcode() == Instruction::Add &&
1599 IID == Intrinsic::usub_with_overflow) {
1600 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1601 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1602 }
1603
1604 // Insert at the first instruction of the pair.
1605 Instruction *InsertPt = nullptr;
1606 for (Instruction &Iter : *Cmp->getParent()) {
1607 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1608 // the overflow intrinsic are defined.
1609 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1610 InsertPt = &Iter;
1611 break;
1612 }
1613 }
1614 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1615
1616 IRBuilder<> Builder(InsertPt);
1617 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1618 if (BO->getOpcode() != Instruction::Xor) {
1619 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1620 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1621 } else
1622 assert(BO->hasOneUse() &&
1623 "Patterns with XOr should use the BO only in the compare");
1624 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1625 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1626 Cmp->eraseFromParent();
1627 BO->eraseFromParent();
1628 return true;
1629}
1630
1631/// Match special-case patterns that check for unsigned add overflow.
1633 BinaryOperator *&Add) {
1634 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1635 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1636 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1637
1638 // We are not expecting non-canonical/degenerate code. Just bail out.
1639 if (isa<Constant>(A))
1640 return false;
1641
1642 ICmpInst::Predicate Pred = Cmp->getPredicate();
1643 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1644 B = ConstantInt::get(B->getType(), 1);
1645 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1646 B = ConstantInt::get(B->getType(), -1);
1647 else
1648 return false;
1649
1650 // Check the users of the variable operand of the compare looking for an add
1651 // with the adjusted constant.
1652 for (User *U : A->users()) {
1653 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1654 Add = cast<BinaryOperator>(U);
1655 return true;
1656 }
1657 }
1658 return false;
1659}
1660
1661/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1662/// intrinsic. Return true if any changes were made.
1663bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1664 ModifyDT &ModifiedDT) {
1665 bool EdgeCase = false;
1666 Value *A, *B;
1668 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1670 return false;
1671 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1672 A = Add->getOperand(0);
1673 B = Add->getOperand(1);
1674 EdgeCase = true;
1675 }
1676
1678 TLI->getValueType(*DL, Add->getType()),
1679 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1680 return false;
1681
1682 // We don't want to move around uses of condition values this late, so we
1683 // check if it is legal to create the call to the intrinsic in the basic
1684 // block containing the icmp.
1685 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1686 return false;
1687
1688 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1689 Intrinsic::uadd_with_overflow))
1690 return false;
1691
1692 // Reset callers - do not crash by iterating over a dead instruction.
1693 ModifiedDT = ModifyDT::ModifyInstDT;
1694 return true;
1695}
1696
1697bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1698 ModifyDT &ModifiedDT) {
1699 // We are not expecting non-canonical/degenerate code. Just bail out.
1700 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1701 if (isa<Constant>(A) && isa<Constant>(B))
1702 return false;
1703
1704 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1705 ICmpInst::Predicate Pred = Cmp->getPredicate();
1706 if (Pred == ICmpInst::ICMP_UGT) {
1707 std::swap(A, B);
1708 Pred = ICmpInst::ICMP_ULT;
1709 }
1710 // Convert special-case: (A == 0) is the same as (A u< 1).
1711 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1712 B = ConstantInt::get(B->getType(), 1);
1713 Pred = ICmpInst::ICMP_ULT;
1714 }
1715 // Convert special-case: (A != 0) is the same as (0 u< A).
1716 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1717 std::swap(A, B);
1718 Pred = ICmpInst::ICMP_ULT;
1719 }
1720 if (Pred != ICmpInst::ICMP_ULT)
1721 return false;
1722
1723 // Walk the users of a variable operand of a compare looking for a subtract or
1724 // add with that same operand. Also match the 2nd operand of the compare to
1725 // the add/sub, but that may be a negated constant operand of an add.
1726 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1727 BinaryOperator *Sub = nullptr;
1728 for (User *U : CmpVariableOperand->users()) {
1729 // A - B, A u< B --> usubo(A, B)
1730 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1731 Sub = cast<BinaryOperator>(U);
1732 break;
1733 }
1734
1735 // A + (-C), A u< C (canonicalized form of (sub A, C))
1736 const APInt *CmpC, *AddC;
1737 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1738 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1739 Sub = cast<BinaryOperator>(U);
1740 break;
1741 }
1742 }
1743 if (!Sub)
1744 return false;
1745
1747 TLI->getValueType(*DL, Sub->getType()),
1748 Sub->hasNUsesOrMore(1)))
1749 return false;
1750
1751 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1752 Cmp, Intrinsic::usub_with_overflow))
1753 return false;
1754
1755 // Reset callers - do not crash by iterating over a dead instruction.
1756 ModifiedDT = ModifyDT::ModifyInstDT;
1757 return true;
1758}
1759
1760/// Sink the given CmpInst into user blocks to reduce the number of virtual
1761/// registers that must be created and coalesced. This is a clear win except on
1762/// targets with multiple condition code registers (PowerPC), where it might
1763/// lose; some adjustment may be wanted there.
1764///
1765/// Return true if any changes are made.
1766static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1768 return false;
1769
1770 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1771 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1772 return false;
1773
1774 // Only insert a cmp in each block once.
1776
1777 bool MadeChange = false;
1778 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1779 UI != E;) {
1780 Use &TheUse = UI.getUse();
1781 Instruction *User = cast<Instruction>(*UI);
1782
1783 // Preincrement use iterator so we don't invalidate it.
1784 ++UI;
1785
1786 // Don't bother for PHI nodes.
1787 if (isa<PHINode>(User))
1788 continue;
1789
1790 // Figure out which BB this cmp is used in.
1791 BasicBlock *UserBB = User->getParent();
1792 BasicBlock *DefBB = Cmp->getParent();
1793
1794 // If this user is in the same block as the cmp, don't change the cmp.
1795 if (UserBB == DefBB)
1796 continue;
1797
1798 // If we have already inserted a cmp into this block, use it.
1799 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1800
1801 if (!InsertedCmp) {
1802 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1803 assert(InsertPt != UserBB->end());
1804 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1805 Cmp->getOperand(0), Cmp->getOperand(1), "");
1806 InsertedCmp->insertBefore(*UserBB, InsertPt);
1807 // Propagate the debug info.
1808 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1809 }
1810
1811 // Replace a use of the cmp with a use of the new cmp.
1812 TheUse = InsertedCmp;
1813 MadeChange = true;
1814 ++NumCmpUses;
1815 }
1816
1817 // If we removed all uses, nuke the cmp.
1818 if (Cmp->use_empty()) {
1819 Cmp->eraseFromParent();
1820 MadeChange = true;
1821 }
1822
1823 return MadeChange;
1824}
1825
1826/// For pattern like:
1827///
1828/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1829/// ...
1830/// DomBB:
1831/// ...
1832/// br DomCond, TrueBB, CmpBB
1833/// CmpBB: (with DomBB being the single predecessor)
1834/// ...
1835/// Cmp = icmp eq CmpOp0, CmpOp1
1836/// ...
1837///
1838/// It would use two comparison on targets that lowering of icmp sgt/slt is
1839/// different from lowering of icmp eq (PowerPC). This function try to convert
1840/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1841/// After that, DomCond and Cmp can use the same comparison so reduce one
1842/// comparison.
1843///
1844/// Return true if any changes are made.
1846 const TargetLowering &TLI) {
1848 return false;
1849
1850 ICmpInst::Predicate Pred = Cmp->getPredicate();
1851 if (Pred != ICmpInst::ICMP_EQ)
1852 return false;
1853
1854 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1855 // icmp slt/sgt would introduce more redundant LLVM IR.
1856 for (User *U : Cmp->users()) {
1857 if (isa<BranchInst>(U))
1858 continue;
1859 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1860 continue;
1861 return false;
1862 }
1863
1864 // This is a cheap/incomplete check for dominance - just match a single
1865 // predecessor with a conditional branch.
1866 BasicBlock *CmpBB = Cmp->getParent();
1867 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1868 if (!DomBB)
1869 return false;
1870
1871 // We want to ensure that the only way control gets to the comparison of
1872 // interest is that a less/greater than comparison on the same operands is
1873 // false.
1874 Value *DomCond;
1875 BasicBlock *TrueBB, *FalseBB;
1876 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1877 return false;
1878 if (CmpBB != FalseBB)
1879 return false;
1880
1881 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1882 ICmpInst::Predicate DomPred;
1883 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1884 return false;
1885 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1886 return false;
1887
1888 // Convert the equality comparison to the opposite of the dominating
1889 // comparison and swap the direction for all branch/select users.
1890 // We have conceptually converted:
1891 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1892 // to
1893 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1894 // And similarly for branches.
1895 for (User *U : Cmp->users()) {
1896 if (auto *BI = dyn_cast<BranchInst>(U)) {
1897 assert(BI->isConditional() && "Must be conditional");
1898 BI->swapSuccessors();
1899 continue;
1900 }
1901 if (auto *SI = dyn_cast<SelectInst>(U)) {
1902 // Swap operands
1903 SI->swapValues();
1904 SI->swapProfMetadata();
1905 continue;
1906 }
1907 llvm_unreachable("Must be a branch or a select");
1908 }
1909 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1910 return true;
1911}
1912
1913/// Many architectures use the same instruction for both subtract and cmp. Try
1914/// to swap cmp operands to match subtract operations to allow for CSE.
1916 Value *Op0 = Cmp->getOperand(0);
1917 Value *Op1 = Cmp->getOperand(1);
1918 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1919 isa<Constant>(Op1) || Op0 == Op1)
1920 return false;
1921
1922 // If a subtract already has the same operands as a compare, swapping would be
1923 // bad. If a subtract has the same operands as a compare but in reverse order,
1924 // then swapping is good.
1925 int GoodToSwap = 0;
1926 unsigned NumInspected = 0;
1927 for (const User *U : Op0->users()) {
1928 // Avoid walking many users.
1929 if (++NumInspected > 128)
1930 return false;
1931 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
1932 GoodToSwap++;
1933 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
1934 GoodToSwap--;
1935 }
1936
1937 if (GoodToSwap > 0) {
1938 Cmp->swapOperands();
1939 return true;
1940 }
1941 return false;
1942}
1943
1944static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
1945 const DataLayout &DL) {
1946 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
1947 if (!FCmp)
1948 return false;
1949
1950 // Don't fold if the target offers free fabs and the predicate is legal.
1951 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
1952 if (TLI.isFAbsFree(VT) &&
1954 VT.getSimpleVT()))
1955 return false;
1956
1957 // Reverse the canonicalization if it is a FP class test
1958 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
1959 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
1960 };
1961 auto [ClassVal, ClassTest] =
1962 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
1963 FCmp->getOperand(0), FCmp->getOperand(1));
1964 if (!ClassVal)
1965 return false;
1966
1967 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
1968 return false;
1969
1970 IRBuilder<> Builder(Cmp);
1971 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
1972 Cmp->replaceAllUsesWith(IsFPClass);
1974 return true;
1975}
1976
1977bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
1978 if (sinkCmpExpression(Cmp, *TLI))
1979 return true;
1980
1981 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
1982 return true;
1983
1984 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
1985 return true;
1986
1987 if (foldICmpWithDominatingICmp(Cmp, *TLI))
1988 return true;
1989
1991 return true;
1992
1993 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
1994 return true;
1995
1996 return false;
1997}
1998
1999/// Duplicate and sink the given 'and' instruction into user blocks where it is
2000/// used in a compare to allow isel to generate better code for targets where
2001/// this operation can be combined.
2002///
2003/// Return true if any changes are made.
2005 SetOfInstrs &InsertedInsts) {
2006 // Double-check that we're not trying to optimize an instruction that was
2007 // already optimized by some other part of this pass.
2008 assert(!InsertedInsts.count(AndI) &&
2009 "Attempting to optimize already optimized and instruction");
2010 (void)InsertedInsts;
2011
2012 // Nothing to do for single use in same basic block.
2013 if (AndI->hasOneUse() &&
2014 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2015 return false;
2016
2017 // Try to avoid cases where sinking/duplicating is likely to increase register
2018 // pressure.
2019 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2020 !isa<ConstantInt>(AndI->getOperand(1)) &&
2021 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2022 return false;
2023
2024 for (auto *U : AndI->users()) {
2025 Instruction *User = cast<Instruction>(U);
2026
2027 // Only sink 'and' feeding icmp with 0.
2028 if (!isa<ICmpInst>(User))
2029 return false;
2030
2031 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2032 if (!CmpC || !CmpC->isZero())
2033 return false;
2034 }
2035
2036 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2037 return false;
2038
2039 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2040 LLVM_DEBUG(AndI->getParent()->dump());
2041
2042 // Push the 'and' into the same block as the icmp 0. There should only be
2043 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2044 // others, so we don't need to keep track of which BBs we insert into.
2045 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2046 UI != E;) {
2047 Use &TheUse = UI.getUse();
2048 Instruction *User = cast<Instruction>(*UI);
2049
2050 // Preincrement use iterator so we don't invalidate it.
2051 ++UI;
2052
2053 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2054
2055 // Keep the 'and' in the same place if the use is already in the same block.
2056 Instruction *InsertPt =
2057 User->getParent() == AndI->getParent() ? AndI : User;
2058 Instruction *InsertedAnd = BinaryOperator::Create(
2059 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2060 InsertPt->getIterator());
2061 // Propagate the debug info.
2062 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2063
2064 // Replace a use of the 'and' with a use of the new 'and'.
2065 TheUse = InsertedAnd;
2066 ++NumAndUses;
2067 LLVM_DEBUG(User->getParent()->dump());
2068 }
2069
2070 // We removed all uses, nuke the and.
2071 AndI->eraseFromParent();
2072 return true;
2073}
2074
2075/// Check if the candidates could be combined with a shift instruction, which
2076/// includes:
2077/// 1. Truncate instruction
2078/// 2. And instruction and the imm is a mask of the low bits:
2079/// imm & (imm+1) == 0
2081 if (!isa<TruncInst>(User)) {
2082 if (User->getOpcode() != Instruction::And ||
2083 !isa<ConstantInt>(User->getOperand(1)))
2084 return false;
2085
2086 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2087
2088 if ((Cimm & (Cimm + 1)).getBoolValue())
2089 return false;
2090 }
2091 return true;
2092}
2093
2094/// Sink both shift and truncate instruction to the use of truncate's BB.
2095static bool
2098 const TargetLowering &TLI, const DataLayout &DL) {
2099 BasicBlock *UserBB = User->getParent();
2101 auto *TruncI = cast<TruncInst>(User);
2102 bool MadeChange = false;
2103
2104 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2105 TruncE = TruncI->user_end();
2106 TruncUI != TruncE;) {
2107
2108 Use &TruncTheUse = TruncUI.getUse();
2109 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2110 // Preincrement use iterator so we don't invalidate it.
2111
2112 ++TruncUI;
2113
2114 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2115 if (!ISDOpcode)
2116 continue;
2117
2118 // If the use is actually a legal node, there will not be an
2119 // implicit truncate.
2120 // FIXME: always querying the result type is just an
2121 // approximation; some nodes' legality is determined by the
2122 // operand or other means. There's no good way to find out though.
2124 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2125 continue;
2126
2127 // Don't bother for PHI nodes.
2128 if (isa<PHINode>(TruncUser))
2129 continue;
2130
2131 BasicBlock *TruncUserBB = TruncUser->getParent();
2132
2133 if (UserBB == TruncUserBB)
2134 continue;
2135
2136 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2137 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2138
2139 if (!InsertedShift && !InsertedTrunc) {
2140 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2141 assert(InsertPt != TruncUserBB->end());
2142 // Sink the shift
2143 if (ShiftI->getOpcode() == Instruction::AShr)
2144 InsertedShift =
2145 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2146 else
2147 InsertedShift =
2148 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2149 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2150 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2151
2152 // Sink the trunc
2153 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2154 TruncInsertPt++;
2155 // It will go ahead of any debug-info.
2156 TruncInsertPt.setHeadBit(true);
2157 assert(TruncInsertPt != TruncUserBB->end());
2158
2159 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2160 TruncI->getType(), "");
2161 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2162 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2163
2164 MadeChange = true;
2165
2166 TruncTheUse = InsertedTrunc;
2167 }
2168 }
2169 return MadeChange;
2170}
2171
2172/// Sink the shift *right* instruction into user blocks if the uses could
2173/// potentially be combined with this shift instruction and generate BitExtract
2174/// instruction. It will only be applied if the architecture supports BitExtract
2175/// instruction. Here is an example:
2176/// BB1:
2177/// %x.extract.shift = lshr i64 %arg1, 32
2178/// BB2:
2179/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2180/// ==>
2181///
2182/// BB2:
2183/// %x.extract.shift.1 = lshr i64 %arg1, 32
2184/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2185///
2186/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2187/// instruction.
2188/// Return true if any changes are made.
2190 const TargetLowering &TLI,
2191 const DataLayout &DL) {
2192 BasicBlock *DefBB = ShiftI->getParent();
2193
2194 /// Only insert instructions in each block once.
2196
2197 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2198
2199 bool MadeChange = false;
2200 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2201 UI != E;) {
2202 Use &TheUse = UI.getUse();
2203 Instruction *User = cast<Instruction>(*UI);
2204 // Preincrement use iterator so we don't invalidate it.
2205 ++UI;
2206
2207 // Don't bother for PHI nodes.
2208 if (isa<PHINode>(User))
2209 continue;
2210
2212 continue;
2213
2214 BasicBlock *UserBB = User->getParent();
2215
2216 if (UserBB == DefBB) {
2217 // If the shift and truncate instruction are in the same BB. The use of
2218 // the truncate(TruncUse) may still introduce another truncate if not
2219 // legal. In this case, we would like to sink both shift and truncate
2220 // instruction to the BB of TruncUse.
2221 // for example:
2222 // BB1:
2223 // i64 shift.result = lshr i64 opnd, imm
2224 // trunc.result = trunc shift.result to i16
2225 //
2226 // BB2:
2227 // ----> We will have an implicit truncate here if the architecture does
2228 // not have i16 compare.
2229 // cmp i16 trunc.result, opnd2
2230 //
2231 if (isa<TruncInst>(User) &&
2232 shiftIsLegal
2233 // If the type of the truncate is legal, no truncate will be
2234 // introduced in other basic blocks.
2235 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2236 MadeChange =
2237 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2238
2239 continue;
2240 }
2241 // If we have already inserted a shift into this block, use it.
2242 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2243
2244 if (!InsertedShift) {
2245 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2246 assert(InsertPt != UserBB->end());
2247
2248 if (ShiftI->getOpcode() == Instruction::AShr)
2249 InsertedShift =
2250 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2251 else
2252 InsertedShift =
2253 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2254 InsertedShift->insertBefore(*UserBB, InsertPt);
2255 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2256
2257 MadeChange = true;
2258 }
2259
2260 // Replace a use of the shift with a use of the new shift.
2261 TheUse = InsertedShift;
2262 }
2263
2264 // If we removed all uses, or there are none, nuke the shift.
2265 if (ShiftI->use_empty()) {
2266 salvageDebugInfo(*ShiftI);
2267 ShiftI->eraseFromParent();
2268 MadeChange = true;
2269 }
2270
2271 return MadeChange;
2272}
2273
2274/// If counting leading or trailing zeros is an expensive operation and a zero
2275/// input is defined, add a check for zero to avoid calling the intrinsic.
2276///
2277/// We want to transform:
2278/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2279///
2280/// into:
2281/// entry:
2282/// %cmpz = icmp eq i64 %A, 0
2283/// br i1 %cmpz, label %cond.end, label %cond.false
2284/// cond.false:
2285/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2286/// br label %cond.end
2287/// cond.end:
2288/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2289///
2290/// If the transform is performed, return true and set ModifiedDT to true.
2291static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2292 LoopInfo &LI,
2293 const TargetLowering *TLI,
2294 const DataLayout *DL, ModifyDT &ModifiedDT,
2296 bool IsHugeFunc) {
2297 // If a zero input is undefined, it doesn't make sense to despeculate that.
2298 if (match(CountZeros->getOperand(1), m_One()))
2299 return false;
2300
2301 // If it's cheap to speculate, there's nothing to do.
2302 Type *Ty = CountZeros->getType();
2303 auto IntrinsicID = CountZeros->getIntrinsicID();
2304 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2305 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2306 return false;
2307
2308 // Only handle legal scalar cases. Anything else requires too much work.
2309 unsigned SizeInBits = Ty->getScalarSizeInBits();
2310 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
2311 return false;
2312
2313 // Bail if the value is never zero.
2314 Use &Op = CountZeros->getOperandUse(0);
2315 if (isKnownNonZero(Op, *DL))
2316 return false;
2317
2318 // The intrinsic will be sunk behind a compare against zero and branch.
2319 BasicBlock *StartBlock = CountZeros->getParent();
2320 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2321 if (IsHugeFunc)
2322 FreshBBs.insert(CallBlock);
2323
2324 // Create another block after the count zero intrinsic. A PHI will be added
2325 // in this block to select the result of the intrinsic or the bit-width
2326 // constant if the input to the intrinsic is zero.
2327 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2328 // Any debug-info after CountZeros should not be included.
2329 SplitPt.setHeadBit(true);
2330 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2331 if (IsHugeFunc)
2332 FreshBBs.insert(EndBlock);
2333
2334 // Update the LoopInfo. The new blocks are in the same loop as the start
2335 // block.
2336 if (Loop *L = LI.getLoopFor(StartBlock)) {
2337 L->addBasicBlockToLoop(CallBlock, LI);
2338 L->addBasicBlockToLoop(EndBlock, LI);
2339 }
2340
2341 // Set up a builder to create a compare, conditional branch, and PHI.
2342 IRBuilder<> Builder(CountZeros->getContext());
2343 Builder.SetInsertPoint(StartBlock->getTerminator());
2344 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2345
2346 // Replace the unconditional branch that was created by the first split with
2347 // a compare against zero and a conditional branch.
2348 Value *Zero = Constant::getNullValue(Ty);
2349 // Avoid introducing branch on poison. This also replaces the ctz operand.
2351 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2352 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2353 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2354 StartBlock->getTerminator()->eraseFromParent();
2355
2356 // Create a PHI in the end block to select either the output of the intrinsic
2357 // or the bit width of the operand.
2358 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2359 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2360 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2361 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2362 PN->addIncoming(BitWidth, StartBlock);
2363 PN->addIncoming(CountZeros, CallBlock);
2364
2365 // We are explicitly handling the zero case, so we can set the intrinsic's
2366 // undefined zero argument to 'true'. This will also prevent reprocessing the
2367 // intrinsic; we only despeculate when a zero input is defined.
2368 CountZeros->setArgOperand(1, Builder.getTrue());
2369 ModifiedDT = ModifyDT::ModifyBBDT;
2370 return true;
2371}
2372
2373bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2374 BasicBlock *BB = CI->getParent();
2375
2376 // Lower inline assembly if we can.
2377 // If we found an inline asm expession, and if the target knows how to
2378 // lower it to normal LLVM code, do so now.
2379 if (CI->isInlineAsm()) {
2380 if (TLI->ExpandInlineAsm(CI)) {
2381 // Avoid invalidating the iterator.
2382 CurInstIterator = BB->begin();
2383 // Avoid processing instructions out of order, which could cause
2384 // reuse before a value is defined.
2385 SunkAddrs.clear();
2386 return true;
2387 }
2388 // Sink address computing for memory operands into the block.
2389 if (optimizeInlineAsmInst(CI))
2390 return true;
2391 }
2392
2393 // Align the pointer arguments to this call if the target thinks it's a good
2394 // idea
2395 unsigned MinSize;
2396 Align PrefAlign;
2397 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2398 for (auto &Arg : CI->args()) {
2399 // We want to align both objects whose address is used directly and
2400 // objects whose address is used in casts and GEPs, though it only makes
2401 // sense for GEPs if the offset is a multiple of the desired alignment and
2402 // if size - offset meets the size threshold.
2403 if (!Arg->getType()->isPointerTy())
2404 continue;
2405 APInt Offset(DL->getIndexSizeInBits(
2406 cast<PointerType>(Arg->getType())->getAddressSpace()),
2407 0);
2409 uint64_t Offset2 = Offset.getLimitedValue();
2410 if (!isAligned(PrefAlign, Offset2))
2411 continue;
2412 AllocaInst *AI;
2413 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2414 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2415 AI->setAlignment(PrefAlign);
2416 // Global variables can only be aligned if they are defined in this
2417 // object (i.e. they are uniquely initialized in this object), and
2418 // over-aligning global variables that have an explicit section is
2419 // forbidden.
2420 GlobalVariable *GV;
2421 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2422 GV->getPointerAlignment(*DL) < PrefAlign &&
2423 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2424 GV->setAlignment(PrefAlign);
2425 }
2426 }
2427 // If this is a memcpy (or similar) then we may be able to improve the
2428 // alignment.
2429 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2430 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2431 MaybeAlign MIDestAlign = MI->getDestAlign();
2432 if (!MIDestAlign || DestAlign > *MIDestAlign)
2433 MI->setDestAlignment(DestAlign);
2434 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2435 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2436 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2437 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2438 MTI->setSourceAlignment(SrcAlign);
2439 }
2440 }
2441
2442 // If we have a cold call site, try to sink addressing computation into the
2443 // cold block. This interacts with our handling for loads and stores to
2444 // ensure that we can fold all uses of a potential addressing computation
2445 // into their uses. TODO: generalize this to work over profiling data
2446 if (CI->hasFnAttr(Attribute::Cold) && !OptSize &&
2447 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2448 for (auto &Arg : CI->args()) {
2449 if (!Arg->getType()->isPointerTy())
2450 continue;
2451 unsigned AS = Arg->getType()->getPointerAddressSpace();
2452 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2453 return true;
2454 }
2455
2456 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2457 if (II) {
2458 switch (II->getIntrinsicID()) {
2459 default:
2460 break;
2461 case Intrinsic::assume:
2462 llvm_unreachable("llvm.assume should have been removed already");
2463 case Intrinsic::allow_runtime_check:
2464 case Intrinsic::allow_ubsan_check:
2465 case Intrinsic::experimental_widenable_condition: {
2466 // Give up on future widening opportunities so that we can fold away dead
2467 // paths and merge blocks before going into block-local instruction
2468 // selection.
2469 if (II->use_empty()) {
2470 II->eraseFromParent();
2471 return true;
2472 }
2473 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2474 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2475 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2476 });
2477 return true;
2478 }
2479 case Intrinsic::objectsize:
2480 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2481 case Intrinsic::is_constant:
2482 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2483 case Intrinsic::aarch64_stlxr:
2484 case Intrinsic::aarch64_stxr: {
2485 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2486 if (!ExtVal || !ExtVal->hasOneUse() ||
2487 ExtVal->getParent() == CI->getParent())
2488 return false;
2489 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2490 ExtVal->moveBefore(CI);
2491 // Mark this instruction as "inserted by CGP", so that other
2492 // optimizations don't touch it.
2493 InsertedInsts.insert(ExtVal);
2494 return true;
2495 }
2496
2497 case Intrinsic::launder_invariant_group:
2498 case Intrinsic::strip_invariant_group: {
2499 Value *ArgVal = II->getArgOperand(0);
2500 auto it = LargeOffsetGEPMap.find(II);
2501 if (it != LargeOffsetGEPMap.end()) {
2502 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2503 // Make sure not to have to deal with iterator invalidation
2504 // after possibly adding ArgVal to LargeOffsetGEPMap.
2505 auto GEPs = std::move(it->second);
2506 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2507 LargeOffsetGEPMap.erase(II);
2508 }
2509
2510 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2511 II->eraseFromParent();
2512 return true;
2513 }
2514 case Intrinsic::cttz:
2515 case Intrinsic::ctlz:
2516 // If counting zeros is expensive, try to avoid it.
2517 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2518 IsHugeFunc);
2519 case Intrinsic::fshl:
2520 case Intrinsic::fshr:
2521 return optimizeFunnelShift(II);
2522 case Intrinsic::dbg_assign:
2523 case Intrinsic::dbg_value:
2524 return fixupDbgValue(II);
2525 case Intrinsic::masked_gather:
2526 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2527 case Intrinsic::masked_scatter:
2528 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2529 }
2530
2532 Type *AccessTy;
2533 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2534 while (!PtrOps.empty()) {
2535 Value *PtrVal = PtrOps.pop_back_val();
2536 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2537 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2538 return true;
2539 }
2540 }
2541
2542 // From here on out we're working with named functions.
2543 if (!CI->getCalledFunction())
2544 return false;
2545
2546 // Lower all default uses of _chk calls. This is very similar
2547 // to what InstCombineCalls does, but here we are only lowering calls
2548 // to fortified library functions (e.g. __memcpy_chk) that have the default
2549 // "don't know" as the objectsize. Anything else should be left alone.
2550 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2551 IRBuilder<> Builder(CI);
2552 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2553 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2554 CI->eraseFromParent();
2555 return true;
2556 }
2557
2558 return false;
2559}
2560
2562 const CallInst *CI) {
2563 assert(CI && CI->use_empty());
2564
2565 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2566 switch (II->getIntrinsicID()) {
2567 case Intrinsic::memset:
2568 case Intrinsic::memcpy:
2569 case Intrinsic::memmove:
2570 return true;
2571 default:
2572 return false;
2573 }
2574
2575 LibFunc LF;
2576 Function *Callee = CI->getCalledFunction();
2577 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2578 switch (LF) {
2579 case LibFunc_strcpy:
2580 case LibFunc_strncpy:
2581 case LibFunc_strcat:
2582 case LibFunc_strncat:
2583 return true;
2584 default:
2585 return false;
2586 }
2587
2588 return false;
2589}
2590
2591/// Look for opportunities to duplicate return instructions to the predecessor
2592/// to enable tail call optimizations. The case it is currently looking for is
2593/// the following one. Known intrinsics or library function that may be tail
2594/// called are taken into account as well.
2595/// @code
2596/// bb0:
2597/// %tmp0 = tail call i32 @f0()
2598/// br label %return
2599/// bb1:
2600/// %tmp1 = tail call i32 @f1()
2601/// br label %return
2602/// bb2:
2603/// %tmp2 = tail call i32 @f2()
2604/// br label %return
2605/// return:
2606/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2607/// ret i32 %retval
2608/// @endcode
2609///
2610/// =>
2611///
2612/// @code
2613/// bb0:
2614/// %tmp0 = tail call i32 @f0()
2615/// ret i32 %tmp0
2616/// bb1:
2617/// %tmp1 = tail call i32 @f1()
2618/// ret i32 %tmp1
2619/// bb2:
2620/// %tmp2 = tail call i32 @f2()
2621/// ret i32 %tmp2
2622/// @endcode
2623bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2624 ModifyDT &ModifiedDT) {
2625 if (!BB->getTerminator())
2626 return false;
2627
2628 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2629 if (!RetI)
2630 return false;
2631
2632 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2633
2634 PHINode *PN = nullptr;
2635 ExtractValueInst *EVI = nullptr;
2636 BitCastInst *BCI = nullptr;
2637 Value *V = RetI->getReturnValue();
2638 if (V) {
2639 BCI = dyn_cast<BitCastInst>(V);
2640 if (BCI)
2641 V = BCI->getOperand(0);
2642
2643 EVI = dyn_cast<ExtractValueInst>(V);
2644 if (EVI) {
2645 V = EVI->getOperand(0);
2646 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2647 return false;
2648 }
2649
2650 PN = dyn_cast<PHINode>(V);
2651 }
2652
2653 if (PN && PN->getParent() != BB)
2654 return false;
2655
2656 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2657 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2658 if (BC && BC->hasOneUse())
2659 Inst = BC->user_back();
2660
2661 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2662 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2663 return false;
2664 };
2665
2666 // Make sure there are no instructions between the first instruction
2667 // and return.
2668 const Instruction *BI = BB->getFirstNonPHI();
2669 // Skip over debug and the bitcast.
2670 while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
2671 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
2672 BI = BI->getNextNode();
2673 if (BI != RetI)
2674 return false;
2675
2676 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2677 /// call.
2678 const Function *F = BB->getParent();
2679 SmallVector<BasicBlock *, 4> TailCallBBs;
2680 if (PN) {
2681 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2682 // Look through bitcasts.
2683 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2684 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2685 BasicBlock *PredBB = PN->getIncomingBlock(I);
2686 // Make sure the phi value is indeed produced by the tail call.
2687 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2688 TLI->mayBeEmittedAsTailCall(CI) &&
2689 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2690 TailCallBBs.push_back(PredBB);
2691 } else {
2692 // Consider the cases in which the phi value is indirectly produced by
2693 // the tail call, for example when encountering memset(), memmove(),
2694 // strcpy(), whose return value may have been optimized out. In such
2695 // cases, the value needs to be the first function argument.
2696 //
2697 // bb0:
2698 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
2699 // br label %return
2700 // return:
2701 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
2702 if (PredBB && PredBB->getSingleSuccessor() == BB)
2703 CI = dyn_cast_or_null<CallInst>(
2704 PredBB->getTerminator()->getPrevNonDebugInstruction(true));
2705
2706 if (CI && CI->use_empty() &&
2707 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2708 IncomingVal == CI->getArgOperand(0) &&
2709 TLI->mayBeEmittedAsTailCall(CI) &&
2710 attributesPermitTailCall(F, CI, RetI, *TLI))
2711 TailCallBBs.push_back(PredBB);
2712 }
2713 }
2714 } else {
2716 for (BasicBlock *Pred : predecessors(BB)) {
2717 if (!VisitedBBs.insert(Pred).second)
2718 continue;
2719 if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
2720 CallInst *CI = dyn_cast<CallInst>(I);
2721 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2722 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2723 // Either we return void or the return value must be the first
2724 // argument of a known intrinsic or library function.
2725 if (!V || isa<UndefValue>(V) ||
2726 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2727 V == CI->getArgOperand(0))) {
2728 TailCallBBs.push_back(Pred);
2729 }
2730 }
2731 }
2732 }
2733 }
2734
2735 bool Changed = false;
2736 for (auto const &TailCallBB : TailCallBBs) {
2737 // Make sure the call instruction is followed by an unconditional branch to
2738 // the return block.
2739 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2740 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2741 continue;
2742
2743 // Duplicate the return into TailCallBB.
2744 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2746 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
2747 BFI->setBlockFreq(BB,
2748 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
2749 ModifiedDT = ModifyDT::ModifyBBDT;
2750 Changed = true;
2751 ++NumRetsDup;
2752 }
2753
2754 // If we eliminated all predecessors of the block, delete the block now.
2755 if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
2756 BB->eraseFromParent();
2757
2758 return Changed;
2759}
2760
2761//===----------------------------------------------------------------------===//
2762// Memory Optimization
2763//===----------------------------------------------------------------------===//
2764
2765namespace {
2766
2767/// This is an extended version of TargetLowering::AddrMode
2768/// which holds actual Value*'s for register values.
2769struct ExtAddrMode : public TargetLowering::AddrMode {
2770 Value *BaseReg = nullptr;
2771 Value *ScaledReg = nullptr;
2772 Value *OriginalValue = nullptr;
2773 bool InBounds = true;
2774
2775 enum FieldName {
2776 NoField = 0x00,
2777 BaseRegField = 0x01,
2778 BaseGVField = 0x02,
2779 BaseOffsField = 0x04,
2780 ScaledRegField = 0x08,
2781 ScaleField = 0x10,
2782 MultipleFields = 0xff
2783 };
2784
2785 ExtAddrMode() = default;
2786
2787 void print(raw_ostream &OS) const;
2788 void dump() const;
2789
2790 FieldName compare(const ExtAddrMode &other) {
2791 // First check that the types are the same on each field, as differing types
2792 // is something we can't cope with later on.
2793 if (BaseReg && other.BaseReg &&
2794 BaseReg->getType() != other.BaseReg->getType())
2795 return MultipleFields;
2796 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
2797 return MultipleFields;
2798 if (ScaledReg && other.ScaledReg &&
2799 ScaledReg->getType() != other.ScaledReg->getType())
2800 return MultipleFields;
2801
2802 // Conservatively reject 'inbounds' mismatches.
2803 if (InBounds != other.InBounds)
2804 return MultipleFields;
2805
2806 // Check each field to see if it differs.
2807 unsigned Result = NoField;
2808 if (BaseReg != other.BaseReg)
2809 Result |= BaseRegField;
2810 if (BaseGV != other.BaseGV)
2811 Result |= BaseGVField;
2812 if (BaseOffs != other.BaseOffs)
2813 Result |= BaseOffsField;
2814 if (ScaledReg != other.ScaledReg)
2815 Result |= ScaledRegField;
2816 // Don't count 0 as being a different scale, because that actually means
2817 // unscaled (which will already be counted by having no ScaledReg).
2818 if (Scale && other.Scale && Scale != other.Scale)
2819 Result |= ScaleField;
2820
2821 if (llvm::popcount(Result) > 1)
2822 return MultipleFields;
2823 else
2824 return static_cast<FieldName>(Result);
2825 }
2826
2827 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2828 // with no offset.
2829 bool isTrivial() {
2830 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2831 // trivial if at most one of these terms is nonzero, except that BaseGV and
2832 // BaseReg both being zero actually means a null pointer value, which we
2833 // consider to be 'non-zero' here.
2834 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2835 }
2836
2837 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2838 switch (Field) {
2839 default:
2840 return nullptr;
2841 case BaseRegField:
2842 return BaseReg;
2843 case BaseGVField:
2844 return BaseGV;
2845 case ScaledRegField:
2846 return ScaledReg;
2847 case BaseOffsField:
2848 return ConstantInt::get(IntPtrTy, BaseOffs);
2849 }
2850 }
2851
2852 void SetCombinedField(FieldName Field, Value *V,
2853 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2854 switch (Field) {
2855 default:
2856 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
2857 break;
2858 case ExtAddrMode::BaseRegField:
2859 BaseReg = V;
2860 break;
2861 case ExtAddrMode::BaseGVField:
2862 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2863 // in the BaseReg field.
2864 assert(BaseReg == nullptr);
2865 BaseReg = V;
2866 BaseGV = nullptr;
2867 break;
2868 case ExtAddrMode::ScaledRegField:
2869 ScaledReg = V;
2870 // If we have a mix of scaled and unscaled addrmodes then we want scale
2871 // to be the scale and not zero.
2872 if (!Scale)
2873 for (const ExtAddrMode &AM : AddrModes)
2874 if (AM.Scale) {
2875 Scale = AM.Scale;
2876 break;
2877 }
2878 break;
2879 case ExtAddrMode::BaseOffsField:
2880 // The offset is no longer a constant, so it goes in ScaledReg with a
2881 // scale of 1.
2882 assert(ScaledReg == nullptr);
2883 ScaledReg = V;
2884 Scale = 1;
2885 BaseOffs = 0;
2886 break;
2887 }
2888 }
2889};
2890
2891#ifndef NDEBUG
2892static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2893 AM.print(OS);
2894 return OS;
2895}
2896#endif
2897
2898#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2899void ExtAddrMode::print(raw_ostream &OS) const {
2900 bool NeedPlus = false;
2901 OS << "[";
2902 if (InBounds)
2903 OS << "inbounds ";
2904 if (BaseGV) {
2905 OS << "GV:";
2906 BaseGV->printAsOperand(OS, /*PrintType=*/false);
2907 NeedPlus = true;
2908 }
2909
2910 if (BaseOffs) {
2911 OS << (NeedPlus ? " + " : "") << BaseOffs;
2912 NeedPlus = true;
2913 }
2914
2915 if (BaseReg) {
2916 OS << (NeedPlus ? " + " : "") << "Base:";
2917 BaseReg->printAsOperand(OS, /*PrintType=*/false);
2918 NeedPlus = true;
2919 }
2920 if (Scale) {
2921 OS << (NeedPlus ? " + " : "") << Scale << "*";
2922 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2923 }
2924
2925 OS << ']';
2926}
2927
2928LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
2929 print(dbgs());
2930 dbgs() << '\n';
2931}
2932#endif
2933
2934} // end anonymous namespace
2935
2936namespace {
2937
2938/// This class provides transaction based operation on the IR.
2939/// Every change made through this class is recorded in the internal state and
2940/// can be undone (rollback) until commit is called.
2941/// CGP does not check if instructions could be speculatively executed when
2942/// moved. Preserving the original location would pessimize the debugging
2943/// experience, as well as negatively impact the quality of sample PGO.
2944class TypePromotionTransaction {
2945 /// This represents the common interface of the individual transaction.
2946 /// Each class implements the logic for doing one specific modification on
2947 /// the IR via the TypePromotionTransaction.
2948 class TypePromotionAction {
2949 protected:
2950 /// The Instruction modified.
2951 Instruction *Inst;
2952
2953 public:
2954 /// Constructor of the action.
2955 /// The constructor performs the related action on the IR.
2956 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2957
2958 virtual ~TypePromotionAction() = default;
2959
2960 /// Undo the modification done by this action.
2961 /// When this method is called, the IR must be in the same state as it was
2962 /// before this action was applied.
2963 /// \pre Undoing the action works if and only if the IR is in the exact same
2964 /// state as it was directly after this action was applied.
2965 virtual void undo() = 0;
2966
2967 /// Advocate every change made by this action.
2968 /// When the results on the IR of the action are to be kept, it is important
2969 /// to call this function, otherwise hidden information may be kept forever.
2970 virtual void commit() {
2971 // Nothing to be done, this action is not doing anything.
2972 }
2973 };
2974
2975 /// Utility to remember the position of an instruction.
2976 class InsertionHandler {
2977 /// Position of an instruction.
2978 /// Either an instruction:
2979 /// - Is the first in a basic block: BB is used.
2980 /// - Has a previous instruction: PrevInst is used.
2981 union {
2982 Instruction *PrevInst;
2983 BasicBlock *BB;
2984 } Point;
2985 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
2986
2987 /// Remember whether or not the instruction had a previous instruction.
2988 bool HasPrevInstruction;
2989
2990 public:
2991 /// Record the position of \p Inst.
2992 InsertionHandler(Instruction *Inst) {
2993 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
2994 BasicBlock *BB = Inst->getParent();
2995
2996 // Record where we would have to re-insert the instruction in the sequence
2997 // of DbgRecords, if we ended up reinserting.
2998 if (BB->IsNewDbgInfoFormat)
2999 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3000
3001 if (HasPrevInstruction) {
3002 Point.PrevInst = &*std::prev(Inst->getIterator());
3003 } else {
3004 Point.BB = BB;
3005 }
3006 }
3007
3008 /// Insert \p Inst at the recorded position.
3009 void insert(Instruction *Inst) {
3010 if (HasPrevInstruction) {
3011 if (Inst->getParent())
3012 Inst->removeFromParent();
3013 Inst->insertAfter(&*Point.PrevInst);
3014 } else {
3015 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3016 if (Inst->getParent())
3017 Inst->moveBefore(*Point.BB, Position);
3018 else
3019 Inst->insertBefore(*Point.BB, Position);
3020 }
3021
3022 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3023 }
3024 };
3025
3026 /// Move an instruction before another.
3027 class InstructionMoveBefore : public TypePromotionAction {
3028 /// Original position of the instruction.
3029 InsertionHandler Position;
3030
3031 public:
3032 /// Move \p Inst before \p Before.
3033 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
3034 : TypePromotionAction(Inst), Position(Inst) {
3035 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3036 << "\n");
3037 Inst->moveBefore(Before);
3038 }
3039
3040 /// Move the instruction back to its original position.
3041 void undo() override {
3042 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3043 Position.insert(Inst);
3044 }
3045 };
3046
3047 /// Set the operand of an instruction with a new value.
3048 class OperandSetter : public TypePromotionAction {
3049 /// Original operand of the instruction.
3050 Value *Origin;
3051
3052 /// Index of the modified instruction.
3053 unsigned Idx;
3054
3055 public:
3056 /// Set \p Idx operand of \p Inst with \p NewVal.
3057 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3058 : TypePromotionAction(Inst), Idx(Idx) {
3059 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3060 << "for:" << *Inst << "\n"
3061 << "with:" << *NewVal << "\n");
3062 Origin = Inst->getOperand(Idx);
3063 Inst->setOperand(Idx, NewVal);
3064 }
3065
3066 /// Restore the original value of the instruction.
3067 void undo() override {
3068 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3069 << "for: " << *Inst << "\n"
3070 << "with: " << *Origin << "\n");
3071 Inst->setOperand(Idx, Origin);
3072 }
3073 };
3074
3075 /// Hide the operands of an instruction.
3076 /// Do as if this instruction was not using any of its operands.
3077 class OperandsHider : public TypePromotionAction {
3078 /// The list of original operands.
3079 SmallVector<Value *, 4> OriginalValues;
3080
3081 public:
3082 /// Remove \p Inst from the uses of the operands of \p Inst.
3083 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3084 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3085 unsigned NumOpnds = Inst->getNumOperands();
3086 OriginalValues.reserve(NumOpnds);
3087 for (unsigned It = 0; It < NumOpnds; ++It) {
3088 // Save the current operand.
3089 Value *Val = Inst->getOperand(It);
3090 OriginalValues.push_back(Val);
3091 // Set a dummy one.
3092 // We could use OperandSetter here, but that would imply an overhead
3093 // that we are not willing to pay.
3094 Inst->setOperand(It, UndefValue::get(Val->getType()));
3095 }
3096 }
3097
3098 /// Restore the original list of uses.
3099 void undo() override {
3100 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3101 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3102 Inst->setOperand(It, OriginalValues[It]);
3103 }
3104 };
3105
3106 /// Build a truncate instruction.
3107 class TruncBuilder : public TypePromotionAction {
3108 Value *Val;
3109
3110 public:
3111 /// Build a truncate instruction of \p Opnd producing a \p Ty
3112 /// result.
3113 /// trunc Opnd to Ty.
3114 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3115 IRBuilder<> Builder(Opnd);
3116 Builder.SetCurrentDebugLocation(DebugLoc());
3117 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3118 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3119 }
3120
3121 /// Get the built value.
3122 Value *getBuiltValue() { return Val; }
3123
3124 /// Remove the built instruction.
3125 void undo() override {
3126 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3127 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3128 IVal->eraseFromParent();
3129 }
3130 };
3131
3132 /// Build a sign extension instruction.
3133 class SExtBuilder : public TypePromotionAction {
3134 Value *Val;
3135
3136 public:
3137 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3138 /// result.
3139 /// sext Opnd to Ty.
3140 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3141 : TypePromotionAction(InsertPt) {
3142 IRBuilder<> Builder(InsertPt);
3143 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3144 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3145 }
3146
3147 /// Get the built value.
3148 Value *getBuiltValue() { return Val; }
3149
3150 /// Remove the built instruction.
3151 void undo() override {
3152 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3153 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3154 IVal->eraseFromParent();
3155 }
3156 };
3157
3158 /// Build a zero extension instruction.
3159 class ZExtBuilder : public TypePromotionAction {
3160 Value *Val;
3161
3162 public:
3163 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3164 /// result.
3165 /// zext Opnd to Ty.
3166 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3167 : TypePromotionAction(InsertPt) {
3168 IRBuilder<> Builder(InsertPt);
3169 Builder.SetCurrentDebugLocation(DebugLoc());
3170 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3171 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3172 }
3173
3174 /// Get the built value.
3175 Value *getBuiltValue() { return Val; }
3176
3177 /// Remove the built instruction.
3178 void undo() override {
3179 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3180 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3181 IVal->eraseFromParent();
3182 }
3183 };
3184
3185 /// Mutate an instruction to another type.
3186 class TypeMutator : public TypePromotionAction {
3187 /// Record the original type.
3188 Type *OrigTy;
3189
3190 public:
3191 /// Mutate the type of \p Inst into \p NewTy.
3192 TypeMutator(Instruction *Inst, Type *NewTy)
3193 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3194 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3195 << "\n");
3196 Inst->mutateType(NewTy);
3197 }
3198
3199 /// Mutate the instruction back to its original type.
3200 void undo() override {
3201 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3202 << "\n");
3203 Inst->mutateType(OrigTy);
3204 }
3205 };
3206
3207 /// Replace the uses of an instruction by another instruction.
3208 class UsesReplacer : public TypePromotionAction {
3209 /// Helper structure to keep track of the replaced uses.
3210 struct InstructionAndIdx {
3211 /// The instruction using the instruction.
3212 Instruction *Inst;
3213
3214 /// The index where this instruction is used for Inst.
3215 unsigned Idx;
3216
3217 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3218 : Inst(Inst), Idx(Idx) {}
3219 };
3220
3221 /// Keep track of the original uses (pair Instruction, Index).
3223 /// Keep track of the debug users.
3225 /// And non-instruction debug-users too.
3226 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3227
3228 /// Keep track of the new value so that we can undo it by replacing
3229 /// instances of the new value with the original value.
3230 Value *New;
3231
3233
3234 public:
3235 /// Replace all the use of \p Inst by \p New.
3236 UsesReplacer(Instruction *Inst, Value *New)
3237 : TypePromotionAction(Inst), New(New) {
3238 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3239 << "\n");
3240 // Record the original uses.
3241 for (Use &U : Inst->uses()) {
3242 Instruction *UserI = cast<Instruction>(U.getUser());
3243 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3244 }
3245 // Record the debug uses separately. They are not in the instruction's
3246 // use list, but they are replaced by RAUW.
3247 findDbgValues(DbgValues, Inst, &DbgVariableRecords);
3248
3249 // Now, we can replace the uses.
3250 Inst->replaceAllUsesWith(New);
3251 }
3252
3253 /// Reassign the original uses of Inst to Inst.
3254 void undo() override {
3255 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3256 for (InstructionAndIdx &Use : OriginalUses)
3257 Use.Inst->setOperand(Use.Idx, Inst);
3258 // RAUW has replaced all original uses with references to the new value,
3259 // including the debug uses. Since we are undoing the replacements,
3260 // the original debug uses must also be reinstated to maintain the
3261 // correctness and utility of debug value instructions.
3262 for (auto *DVI : DbgValues)
3263 DVI->replaceVariableLocationOp(New, Inst);
3264 // Similar story with DbgVariableRecords, the non-instruction
3265 // representation of dbg.values.
3266 for (DbgVariableRecord *DVR : DbgVariableRecords)
3267 DVR->replaceVariableLocationOp(New, Inst);
3268 }
3269 };
3270
3271 /// Remove an instruction from the IR.
3272 class InstructionRemover : public TypePromotionAction {
3273 /// Original position of the instruction.
3274 InsertionHandler Inserter;
3275
3276 /// Helper structure to hide all the link to the instruction. In other
3277 /// words, this helps to do as if the instruction was removed.
3278 OperandsHider Hider;
3279
3280 /// Keep track of the uses replaced, if any.
3281 UsesReplacer *Replacer = nullptr;
3282
3283 /// Keep track of instructions removed.
3284 SetOfInstrs &RemovedInsts;
3285
3286 public:
3287 /// Remove all reference of \p Inst and optionally replace all its
3288 /// uses with New.
3289 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3290 /// \pre If !Inst->use_empty(), then New != nullptr
3291 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3292 Value *New = nullptr)
3293 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3294 RemovedInsts(RemovedInsts) {
3295 if (New)
3296 Replacer = new UsesReplacer(Inst, New);
3297 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3298 RemovedInsts.insert(Inst);
3299 /// The instructions removed here will be freed after completing
3300 /// optimizeBlock() for all blocks as we need to keep track of the
3301 /// removed instructions during promotion.
3302 Inst->removeFromParent();
3303 }
3304
3305 ~InstructionRemover() override { delete Replacer; }
3306
3307 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3308 InstructionRemover(const InstructionRemover &other) = delete;
3309
3310 /// Resurrect the instruction and reassign it to the proper uses if
3311 /// new value was provided when build this action.
3312 void undo() override {
3313 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3314 Inserter.insert(Inst);
3315 if (Replacer)
3316 Replacer->undo();
3317 Hider.undo();
3318 RemovedInsts.erase(Inst);
3319 }
3320 };
3321
3322public:
3323 /// Restoration point.
3324 /// The restoration point is a pointer to an action instead of an iterator
3325 /// because the iterator may be invalidated but not the pointer.
3326 using ConstRestorationPt = const TypePromotionAction *;
3327
3328 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3329 : RemovedInsts(RemovedInsts) {}
3330
3331 /// Advocate every changes made in that transaction. Return true if any change
3332 /// happen.
3333 bool commit();
3334
3335 /// Undo all the changes made after the given point.
3336 void rollback(ConstRestorationPt Point);
3337
3338 /// Get the current restoration point.
3339 ConstRestorationPt getRestorationPoint() const;
3340
3341 /// \name API for IR modification with state keeping to support rollback.
3342 /// @{
3343 /// Same as Instruction::setOperand.
3344 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3345
3346 /// Same as Instruction::eraseFromParent.
3347 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3348
3349 /// Same as Value::replaceAllUsesWith.
3350 void replaceAllUsesWith(Instruction *Inst, Value *New);
3351
3352 /// Same as Value::mutateType.
3353 void mutateType(Instruction *Inst, Type *NewTy);
3354
3355 /// Same as IRBuilder::createTrunc.
3356 Value *createTrunc(Instruction *Opnd, Type *Ty);
3357
3358 /// Same as IRBuilder::createSExt.
3359 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3360
3361 /// Same as IRBuilder::createZExt.
3362 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3363
3364private:
3365 /// The ordered list of actions made so far.
3367
3368 using CommitPt =
3370
3371 SetOfInstrs &RemovedInsts;
3372};
3373
3374} // end anonymous namespace
3375
3376void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3377 Value *NewVal) {
3378 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3379 Inst, Idx, NewVal));
3380}
3381
3382void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3383 Value *NewVal) {
3384 Actions.push_back(
3385 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3386 Inst, RemovedInsts, NewVal));
3387}
3388
3389void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3390 Value *New) {
3391 Actions.push_back(
3392 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3393}
3394
3395void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3396 Actions.push_back(
3397 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3398}
3399
3400Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3401 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3402 Value *Val = Ptr->getBuiltValue();
3403 Actions.push_back(std::move(Ptr));
3404 return Val;
3405}
3406
3407Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3408 Type *Ty) {
3409 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3410 Value *Val = Ptr->getBuiltValue();
3411 Actions.push_back(std::move(Ptr));
3412 return Val;
3413}
3414
3415Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3416 Type *Ty) {
3417 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3418 Value *Val = Ptr->getBuiltValue();
3419 Actions.push_back(std::move(Ptr));
3420 return Val;
3421}
3422
3423TypePromotionTransaction::ConstRestorationPt
3424TypePromotionTransaction::getRestorationPoint() const {
3425 return !Actions.empty() ? Actions.back().get() : nullptr;
3426}
3427
3428bool TypePromotionTransaction::commit() {
3429 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3430 Action->commit();
3431 bool Modified = !Actions.empty();
3432 Actions.clear();
3433 return Modified;
3434}
3435
3436void TypePromotionTransaction::rollback(
3437 TypePromotionTransaction::ConstRestorationPt Point) {
3438 while (!Actions.empty() && Point != Actions.back().get()) {
3439 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3440 Curr->undo();
3441 }
3442}
3443
3444namespace {
3445
3446/// A helper class for matching addressing modes.
3447///
3448/// This encapsulates the logic for matching the target-legal addressing modes.
3449class AddressingModeMatcher {
3450 SmallVectorImpl<Instruction *> &AddrModeInsts;
3451 const TargetLowering &TLI;
3452 const TargetRegisterInfo &TRI;
3453 const DataLayout &DL;
3454 const LoopInfo &LI;
3455 const std::function<const DominatorTree &()> getDTFn;
3456
3457 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3458 /// the memory instruction that we're computing this address for.
3459 Type *AccessTy;
3460 unsigned AddrSpace;
3461 Instruction *MemoryInst;
3462
3463 /// This is the addressing mode that we're building up. This is
3464 /// part of the return value of this addressing mode matching stuff.
3466
3467 /// The instructions inserted by other CodeGenPrepare optimizations.
3468 const SetOfInstrs &InsertedInsts;
3469
3470 /// A map from the instructions to their type before promotion.
3471 InstrToOrigTy &PromotedInsts;
3472
3473 /// The ongoing transaction where every action should be registered.
3474 TypePromotionTransaction &TPT;
3475
3476 // A GEP which has too large offset to be folded into the addressing mode.
3477 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3478
3479 /// This is set to true when we should not do profitability checks.
3480 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3481 bool IgnoreProfitability;
3482
3483 /// True if we are optimizing for size.
3484 bool OptSize = false;
3485
3486 ProfileSummaryInfo *PSI;
3488
3489 AddressingModeMatcher(
3491 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3492 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3493 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3494 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3495 TypePromotionTransaction &TPT,
3496 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3497 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3498 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3499 DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
3500 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3501 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3502 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3503 IgnoreProfitability = false;
3504 }
3505
3506public:
3507 /// Find the maximal addressing mode that a load/store of V can fold,
3508 /// give an access type of AccessTy. This returns a list of involved
3509 /// instructions in AddrModeInsts.
3510 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3511 /// optimizations.
3512 /// \p PromotedInsts maps the instructions to their type before promotion.
3513 /// \p The ongoing transaction where every action should be registered.
3514 static ExtAddrMode
3515 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3516 SmallVectorImpl<Instruction *> &AddrModeInsts,
3517 const TargetLowering &TLI, const LoopInfo &LI,
3518 const std::function<const DominatorTree &()> getDTFn,
3519 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3520 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3521 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3522 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3524
3525 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3526 AccessTy, AS, MemoryInst, Result,
3527 InsertedInsts, PromotedInsts, TPT,
3528 LargeOffsetGEP, OptSize, PSI, BFI)
3529 .matchAddr(V, 0);
3530 (void)Success;
3531 assert(Success && "Couldn't select *anything*?");
3532 return Result;
3533 }
3534
3535private:
3536 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3537 bool matchAddr(Value *Addr, unsigned Depth);
3538 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3539 bool *MovedAway = nullptr);
3540 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3541 ExtAddrMode &AMBefore,
3542 ExtAddrMode &AMAfter);
3543 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3544 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3545 Value *PromotedOperand) const;
3546};
3547
3548class PhiNodeSet;
3549
3550/// An iterator for PhiNodeSet.
3551class PhiNodeSetIterator {
3552 PhiNodeSet *const Set;
3553 size_t CurrentIndex = 0;
3554
3555public:
3556 /// The constructor. Start should point to either a valid element, or be equal
3557 /// to the size of the underlying SmallVector of the PhiNodeSet.
3558 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3559 PHINode *operator*() const;
3560 PhiNodeSetIterator &operator++();
3561 bool operator==(const PhiNodeSetIterator &RHS) const;
3562 bool operator!=(const PhiNodeSetIterator &RHS) const;
3563};
3564
3565/// Keeps a set of PHINodes.
3566///
3567/// This is a minimal set implementation for a specific use case:
3568/// It is very fast when there are very few elements, but also provides good
3569/// performance when there are many. It is similar to SmallPtrSet, but also
3570/// provides iteration by insertion order, which is deterministic and stable
3571/// across runs. It is also similar to SmallSetVector, but provides removing
3572/// elements in O(1) time. This is achieved by not actually removing the element
3573/// from the underlying vector, so comes at the cost of using more memory, but
3574/// that is fine, since PhiNodeSets are used as short lived objects.
3575class PhiNodeSet {
3576 friend class PhiNodeSetIterator;
3577
3579 using iterator = PhiNodeSetIterator;
3580
3581 /// Keeps the elements in the order of their insertion in the underlying
3582 /// vector. To achieve constant time removal, it never deletes any element.
3584
3585 /// Keeps the elements in the underlying set implementation. This (and not the
3586 /// NodeList defined above) is the source of truth on whether an element
3587 /// is actually in the collection.
3588 MapType NodeMap;
3589
3590 /// Points to the first valid (not deleted) element when the set is not empty
3591 /// and the value is not zero. Equals to the size of the underlying vector
3592 /// when the set is empty. When the value is 0, as in the beginning, the
3593 /// first element may or may not be valid.
3594 size_t FirstValidElement = 0;
3595
3596public:
3597 /// Inserts a new element to the collection.
3598 /// \returns true if the element is actually added, i.e. was not in the
3599 /// collection before the operation.
3600 bool insert(PHINode *Ptr) {
3601 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3602 NodeList.push_back(Ptr);
3603 return true;
3604 }
3605 return false;
3606 }
3607
3608 /// Removes the element from the collection.
3609 /// \returns whether the element is actually removed, i.e. was in the
3610 /// collection before the operation.
3611 bool erase(PHINode *Ptr) {
3612 if (NodeMap.erase(Ptr)) {
3613 SkipRemovedElements(FirstValidElement);
3614 return true;
3615 }
3616 return false;
3617 }
3618
3619 /// Removes all elements and clears the collection.
3620 void clear() {
3621 NodeMap.clear();
3622 NodeList.clear();
3623 FirstValidElement = 0;
3624 }
3625
3626 /// \returns an iterator that will iterate the elements in the order of
3627 /// insertion.
3628 iterator begin() {
3629 if (FirstValidElement == 0)
3630 SkipRemovedElements(FirstValidElement);
3631 return PhiNodeSetIterator(this, FirstValidElement);
3632 }
3633
3634 /// \returns an iterator that points to the end of the collection.
3635 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3636
3637 /// Returns the number of elements in the collection.
3638 size_t size() const { return NodeMap.size(); }
3639
3640 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3641 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3642
3643private:
3644 /// Updates the CurrentIndex so that it will point to a valid element.
3645 ///
3646 /// If the element of NodeList at CurrentIndex is valid, it does not
3647 /// change it. If there are no more valid elements, it updates CurrentIndex
3648 /// to point to the end of the NodeList.
3649 void SkipRemovedElements(size_t &CurrentIndex) {
3650 while (CurrentIndex < NodeList.size()) {
3651 auto it = NodeMap.find(NodeList[CurrentIndex]);
3652 // If the element has been deleted and added again later, NodeMap will
3653 // point to a different index, so CurrentIndex will still be invalid.
3654 if (it != NodeMap.end() && it->second == CurrentIndex)
3655 break;
3656 ++CurrentIndex;
3657 }
3658 }
3659};
3660
3661PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3662 : Set(Set), CurrentIndex(Start) {}
3663
3664PHINode *PhiNodeSetIterator::operator*() const {
3665 assert(CurrentIndex < Set->NodeList.size() &&
3666 "PhiNodeSet access out of range");
3667 return Set->NodeList[CurrentIndex];
3668}
3669
3670PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
3671 assert(CurrentIndex < Set->NodeList.size() &&
3672 "PhiNodeSet access out of range");
3673 ++CurrentIndex;
3674 Set->SkipRemovedElements(CurrentIndex);
3675 return *this;
3676}
3677
3678bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3679 return CurrentIndex == RHS.CurrentIndex;
3680}
3681
3682bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3683 return !((*this) == RHS);
3684}
3685
3686/// Keep track of simplification of Phi nodes.
3687/// Accept the set of all phi nodes and erase phi node from this set
3688/// if it is simplified.
3689class SimplificationTracker {
3691 const SimplifyQuery &SQ;
3692 // Tracks newly created Phi nodes. The elements are iterated by insertion
3693 // order.
3694 PhiNodeSet AllPhiNodes;
3695 // Tracks newly created Select nodes.
3696 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3697
3698public:
3699 SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
3700
3701 Value *Get(Value *V) {
3702 do {
3703 auto SV = Storage.find(V);
3704 if (SV == Storage.end())
3705 return V;
3706 V = SV->second;
3707 } while (true);
3708 }
3709
3710 Value *Simplify(Value *Val) {
3711 SmallVector<Value *, 32> WorkList;
3713 WorkList.push_back(Val);
3714 while (!WorkList.empty()) {
3715 auto *P = WorkList.pop_back_val();
3716 if (!Visited.insert(P).second)
3717 continue;
3718 if (auto *PI = dyn_cast<Instruction>(P))
3719 if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
3720 for (auto *U : PI->users())
3721 WorkList.push_back(cast<Value>(U));
3722 Put(PI, V);
3723 PI->replaceAllUsesWith(V);
3724 if (auto *PHI = dyn_cast<PHINode>(PI))
3725 AllPhiNodes.erase(PHI);
3726 if (auto *Select = dyn_cast<SelectInst>(PI))
3727 AllSelectNodes.erase(Select);
3728 PI->eraseFromParent();
3729 }
3730 }
3731 return Get(Val);
3732 }
3733
3734 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
3735
3736 void ReplacePhi(PHINode *From, PHINode *To) {
3737 Value *OldReplacement = Get(From);
3738 while (OldReplacement != From) {
3739 From = To;
3740 To = dyn_cast<PHINode>(OldReplacement);
3741 OldReplacement = Get(From);
3742 }
3743 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
3744 Put(From, To);
3745 From->replaceAllUsesWith(To);
3746 AllPhiNodes.erase(From);
3747 From->eraseFromParent();
3748 }
3749
3750 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
3751
3752 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3753
3754 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3755
3756 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3757
3758 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3759
3760 void destroyNewNodes(Type *CommonType) {
3761 // For safe erasing, replace the uses with dummy value first.
3762 auto *Dummy = PoisonValue::get(CommonType);
3763 for (auto *I : AllPhiNodes) {
3764 I->replaceAllUsesWith(Dummy);
3765 I->eraseFromParent();
3766 }
3767 AllPhiNodes.clear();
3768 for (auto *I : AllSelectNodes) {
3769 I->replaceAllUsesWith(Dummy);
3770 I->eraseFromParent();
3771 }
3772 AllSelectNodes.clear();
3773 }
3774};
3775
3776/// A helper class for combining addressing modes.
3777class AddressingModeCombiner {
3778 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3779 typedef std::pair<PHINode *, PHINode *> PHIPair;
3780
3781private:
3782 /// The addressing modes we've collected.
3784
3785 /// The field in which the AddrModes differ, when we have more than one.
3786 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3787
3788 /// Are the AddrModes that we have all just equal to their original values?
3789 bool AllAddrModesTrivial = true;
3790
3791 /// Common Type for all different fields in addressing modes.
3792 Type *CommonType = nullptr;
3793
3794 /// SimplifyQuery for simplifyInstruction utility.
3795 const SimplifyQuery &SQ;
3796
3797 /// Original Address.
3798 Value *Original;
3799
3800 /// Common value among addresses
3801 Value *CommonValue = nullptr;
3802
3803public:
3804 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3805 : SQ(_SQ), Original(OriginalValue) {}
3806
3807 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
3808
3809 /// Get the combined AddrMode
3810 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
3811
3812 /// Add a new AddrMode if it's compatible with the AddrModes we already
3813 /// have.
3814 /// \return True iff we succeeded in doing so.
3815 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3816 // Take note of if we have any non-trivial AddrModes, as we need to detect
3817 // when all AddrModes are trivial as then we would introduce a phi or select
3818 // which just duplicates what's already there.
3819 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3820
3821 // If this is the first addrmode then everything is fine.
3822 if (AddrModes.empty()) {
3823 AddrModes.emplace_back(NewAddrMode);
3824 return true;
3825 }
3826
3827 // Figure out how different this is from the other address modes, which we
3828 // can do just by comparing against the first one given that we only care
3829 // about the cumulative difference.
3830 ExtAddrMode::FieldName ThisDifferentField =
3831 AddrModes[0].compare(NewAddrMode);
3832 if (DifferentField == ExtAddrMode::NoField)
3833 DifferentField = ThisDifferentField;
3834 else if (DifferentField != ThisDifferentField)
3835 DifferentField = ExtAddrMode::MultipleFields;
3836
3837 // If NewAddrMode differs in more than one dimension we cannot handle it.
3838 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3839
3840 // If Scale Field is different then we reject.
3841 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3842
3843 // We also must reject the case when base offset is different and
3844 // scale reg is not null, we cannot handle this case due to merge of
3845 // different offsets will be used as ScaleReg.
3846 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3847 !NewAddrMode.ScaledReg);
3848
3849 // We also must reject the case when GV is different and BaseReg installed
3850 // due to we want to use base reg as a merge of GV values.
3851 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3852 !NewAddrMode.HasBaseReg);
3853
3854 // Even if NewAddMode is the same we still need to collect it due to
3855 // original value is different. And later we will need all original values
3856 // as anchors during finding the common Phi node.
3857 if (CanHandle)
3858 AddrModes.emplace_back(NewAddrMode);
3859 else
3860 AddrModes.clear();
3861
3862 return CanHandle;
3863 }
3864
3865 /// Combine the addressing modes we've collected into a single
3866 /// addressing mode.
3867 /// \return True iff we successfully combined them or we only had one so
3868 /// didn't need to combine them anyway.
3869 bool combineAddrModes() {
3870 // If we have no AddrModes then they can't be combined.
3871 if (AddrModes.size() == 0)
3872 return false;
3873
3874 // A single AddrMode can trivially be combined.
3875 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
3876 return true;
3877
3878 // If the AddrModes we collected are all just equal to the value they are
3879 // derived from then combining them wouldn't do anything useful.
3880 if (AllAddrModesTrivial)
3881 return false;
3882
3883 if (!addrModeCombiningAllowed())
3884 return false;
3885
3886 // Build a map between <original value, basic block where we saw it> to
3887 // value of base register.
3888 // Bail out if there is no common type.
3889 FoldAddrToValueMapping Map;
3890 if (!initializeMap(Map))
3891 return false;
3892
3893 CommonValue = findCommon(Map);
3894 if (CommonValue)
3895 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3896 return CommonValue != nullptr;
3897 }
3898
3899private:
3900 /// `CommonValue` may be a placeholder inserted by us.
3901 /// If the placeholder is not used, we should remove this dead instruction.
3902 void eraseCommonValueIfDead() {
3903 if (CommonValue && CommonValue->getNumUses() == 0)
3904 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
3905 CommonInst->eraseFromParent();
3906 }
3907
3908 /// Initialize Map with anchor values. For address seen
3909 /// we set the value of different field saw in this address.
3910 /// At the same time we find a common type for different field we will
3911 /// use to create new Phi/Select nodes. Keep it in CommonType field.
3912 /// Return false if there is no common type found.
3913 bool initializeMap(FoldAddrToValueMapping &Map) {
3914 // Keep track of keys where the value is null. We will need to replace it
3915 // with constant null when we know the common type.
3916 SmallVector<Value *, 2> NullValue;
3917 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3918 for (auto &AM : AddrModes) {
3919 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3920 if (DV) {
3921 auto *Type = DV->getType();
3922 if (CommonType && CommonType != Type)
3923 return false;
3924 CommonType = Type;
3925 Map[AM.OriginalValue] = DV;
3926 } else {
3927 NullValue.push_back(AM.OriginalValue);
3928 }
3929 }
3930 assert(CommonType && "At least one non-null value must be!");
3931 for (auto *V : NullValue)
3932 Map[V] = Constant::getNullValue(CommonType);
3933 return true;
3934 }
3935
3936 /// We have mapping between value A and other value B where B was a field in
3937 /// addressing mode represented by A. Also we have an original value C
3938 /// representing an address we start with. Traversing from C through phi and
3939 /// selects we ended up with A's in a map. This utility function tries to find
3940 /// a value V which is a field in addressing mode C and traversing through phi
3941 /// nodes and selects we will end up in corresponded values B in a map.
3942 /// The utility will create a new Phi/Selects if needed.
3943 // The simple example looks as follows:
3944 // BB1:
3945 // p1 = b1 + 40
3946 // br cond BB2, BB3
3947 // BB2:
3948 // p2 = b2 + 40
3949 // br BB3
3950 // BB3:
3951 // p = phi [p1, BB1], [p2, BB2]
3952 // v = load p
3953 // Map is
3954 // p1 -> b1
3955 // p2 -> b2
3956 // Request is
3957 // p -> ?
3958 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3959 Value *findCommon(FoldAddrToValueMapping &Map) {
3960 // Tracks the simplification of newly created phi nodes. The reason we use
3961 // this mapping is because we will add new created Phi nodes in AddrToBase.
3962 // Simplification of Phi nodes is recursive, so some Phi node may
3963 // be simplified after we added it to AddrToBase. In reality this
3964 // simplification is possible only if original phi/selects were not
3965 // simplified yet.
3966 // Using this mapping we can find the current value in AddrToBase.
3967 SimplificationTracker ST(SQ);
3968
3969 // First step, DFS to create PHI nodes for all intermediate blocks.
3970 // Also fill traverse order for the second step.
3971 SmallVector<Value *, 32> TraverseOrder;
3972 InsertPlaceholders(Map, TraverseOrder, ST);
3973
3974 // Second Step, fill new nodes by merged values and simplify if possible.
3975 FillPlaceholders(Map, TraverseOrder, ST);
3976
3977 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
3978 ST.destroyNewNodes(CommonType);
3979 return nullptr;
3980 }
3981
3982 // Now we'd like to match New Phi nodes to existed ones.
3983 unsigned PhiNotMatchedCount = 0;
3984 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
3985 ST.destroyNewNodes(CommonType);
3986 return nullptr;
3987 }
3988
3989 auto *Result = ST.Get(Map.find(Original)->second);
3990 if (Result) {
3991 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3992 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3993 }
3994 return Result;
3995 }
3996
3997 /// Try to match PHI node to Candidate.
3998 /// Matcher tracks the matched Phi nodes.
3999 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4001 PhiNodeSet &PhiNodesToMatch) {
4002 SmallVector<PHIPair, 8> WorkList;
4003 Matcher.insert({PHI, Candidate});
4004 SmallSet<PHINode *, 8> MatchedPHIs;
4005 MatchedPHIs.insert(PHI);
4006 WorkList.push_back({PHI, Candidate});
4007 SmallSet<PHIPair, 8> Visited;
4008 while (!WorkList.empty()) {
4009 auto Item = WorkList.pop_back_val();
4010 if (!Visited.insert(Item).second)
4011 continue;
4012 // We iterate over all incoming values to Phi to compare them.
4013 // If values are different and both of them Phi and the first one is a
4014 // Phi we added (subject to match) and both of them is in the same basic
4015 // block then we can match our pair if values match. So we state that
4016 // these values match and add it to work list to verify that.
4017 for (auto *B : Item.first->blocks()) {
4018 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4019 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4020 if (FirstValue == SecondValue)
4021 continue;
4022
4023 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4024 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4025
4026 // One of them is not Phi or
4027 // The first one is not Phi node from the set we'd like to match or
4028 // Phi nodes from different basic blocks then
4029 // we will not be able to match.
4030 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4031 FirstPhi->getParent() != SecondPhi->getParent())
4032 return false;
4033
4034 // If we already matched them then continue.
4035 if (Matcher.count({FirstPhi, SecondPhi}))
4036 continue;
4037 // So the values are different and does not match. So we need them to
4038 // match. (But we register no more than one match per PHI node, so that
4039 // we won't later try to replace them twice.)
4040 if (MatchedPHIs.insert(FirstPhi).second)
4041 Matcher.insert({FirstPhi, SecondPhi});
4042 // But me must check it.
4043 WorkList.push_back({FirstPhi, SecondPhi});
4044 }
4045 }
4046 return true;
4047 }
4048
4049 /// For the given set of PHI nodes (in the SimplificationTracker) try
4050 /// to find their equivalents.
4051 /// Returns false if this matching fails and creation of new Phi is disabled.
4052 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4053 unsigned &PhiNotMatchedCount) {
4054 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4055 // order, so the replacements (ReplacePhi) are also done in a deterministic
4056 // order.
4058 SmallPtrSet<PHINode *, 8> WillNotMatch;
4059 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4060 while (PhiNodesToMatch.size()) {
4061 PHINode *PHI = *PhiNodesToMatch.begin();
4062
4063 // Add us, if no Phi nodes in the basic block we do not match.
4064 WillNotMatch.clear();
4065 WillNotMatch.insert(PHI);
4066
4067 // Traverse all Phis until we found equivalent or fail to do that.
4068 bool IsMatched = false;
4069 for (auto &P : PHI->getParent()->phis()) {
4070 // Skip new Phi nodes.
4071 if (PhiNodesToMatch.count(&P))
4072 continue;
4073 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4074 break;
4075 // If it does not match, collect all Phi nodes from matcher.
4076 // if we end up with no match, them all these Phi nodes will not match
4077 // later.
4078 for (auto M : Matched)
4079 WillNotMatch.insert(M.first);
4080 Matched.clear();
4081 }
4082 if (IsMatched) {
4083 // Replace all matched values and erase them.
4084 for (auto MV : Matched)
4085 ST.ReplacePhi(MV.first, MV.second);
4086 Matched.clear();
4087 continue;
4088 }
4089 // If we are not allowed to create new nodes then bail out.
4090 if (!AllowNewPhiNodes)
4091 return false;
4092 // Just remove all seen values in matcher. They will not match anything.
4093 PhiNotMatchedCount += WillNotMatch.size();
4094 for (auto *P : WillNotMatch)
4095 PhiNodesToMatch.erase(P);
4096 }
4097 return true;
4098 }
4099 /// Fill the placeholders with values from predecessors and simplify them.
4100 void FillPlaceholders(FoldAddrToValueMapping &Map,
4101 SmallVectorImpl<Value *> &TraverseOrder,
4102 SimplificationTracker &ST) {
4103 while (!TraverseOrder.empty()) {
4104 Value *Current = TraverseOrder.pop_back_val();
4105 assert(Map.contains(Current) && "No node to fill!!!");
4106 Value *V = Map[Current];
4107
4108 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4109 // CurrentValue also must be Select.
4110 auto *CurrentSelect = cast<SelectInst>(Current);
4111 auto *TrueValue = CurrentSelect->getTrueValue();
4112 assert(Map.contains(TrueValue) && "No True Value!");
4113 Select->setTrueValue(ST.Get(Map[TrueValue]));
4114 auto *FalseValue = CurrentSelect->getFalseValue();
4115 assert(Map.contains(FalseValue) && "No False Value!");
4116 Select->setFalseValue(ST.Get(Map[FalseValue]));
4117 } else {
4118 // Must be a Phi node then.
4119 auto *PHI = cast<PHINode>(V);
4120 // Fill the Phi node with values from predecessors.
4121 for (auto *B : predecessors(PHI->getParent())) {
4122 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4123 assert(Map.contains(PV) && "No predecessor Value!");
4124 PHI->addIncoming(ST.Get(Map[PV]), B);
4125 }
4126 }
4127 Map[Current] = ST.Simplify(V);
4128 }
4129 }
4130
4131 /// Starting from original value recursively iterates over def-use chain up to
4132 /// known ending values represented in a map. For each traversed phi/select
4133 /// inserts a placeholder Phi or Select.
4134 /// Reports all new created Phi/Select nodes by adding them to set.
4135 /// Also reports and order in what values have been traversed.
4136 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4137 SmallVectorImpl<Value *> &TraverseOrder,
4138 SimplificationTracker &ST) {
4139 SmallVector<Value *, 32> Worklist;
4140 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4141 "Address must be a Phi or Select node");
4142 auto *Dummy = PoisonValue::get(CommonType);
4143 Worklist.push_back(Original);
4144 while (!Worklist.empty()) {
4145 Value *Current = Worklist.pop_back_val();
4146 // if it is already visited or it is an ending value then skip it.
4147 if (Map.contains(Current))
4148 continue;
4149 TraverseOrder.push_back(Current);
4150
4151 // CurrentValue must be a Phi node or select. All others must be covered
4152 // by anchors.
4153 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4154 // Is it OK to get metadata from OrigSelect?!
4155 // Create a Select placeholder with dummy value.
4157 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4158 CurrentSelect->getName(),
4159 CurrentSelect->getIterator(), CurrentSelect);
4160 Map[Current] = Select;
4161 ST.insertNewSelect(Select);
4162 // We are interested in True and False values.
4163 Worklist.push_back(CurrentSelect->getTrueValue());
4164 Worklist.push_back(CurrentSelect->getFalseValue());
4165 } else {
4166 // It must be a Phi node then.
4167 PHINode *CurrentPhi = cast<PHINode>(Current);
4168 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4169 PHINode *PHI =
4170 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4171 Map[Current] = PHI;
4172 ST.insertNewPhi(PHI);
4173 append_range(Worklist, CurrentPhi->incoming_values());
4174 }
4175 }
4176 }
4177
4178 bool addrModeCombiningAllowed() {
4180 return false;
4181 switch (DifferentField) {
4182 default:
4183 return false;
4184 case ExtAddrMode::BaseRegField:
4186 case ExtAddrMode::BaseGVField:
4187 return AddrSinkCombineBaseGV;
4188 case ExtAddrMode::BaseOffsField:
4190 case ExtAddrMode::ScaledRegField:
4192 }
4193 }
4194};
4195} // end anonymous namespace
4196
4197/// Try adding ScaleReg*Scale to the current addressing mode.
4198/// Return true and update AddrMode if this addr mode is legal for the target,
4199/// false if not.
4200bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4201 unsigned Depth) {
4202 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4203 // mode. Just process that directly.
4204 if (Scale == 1)
4205 return matchAddr(ScaleReg, Depth);
4206
4207 // If the scale is 0, it takes nothing to add this.
4208 if (Scale == 0)
4209 return true;
4210
4211 // If we already have a scale of this value, we can add to it, otherwise, we
4212 // need an available scale field.
4213 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4214 return false;
4215
4216 ExtAddrMode TestAddrMode = AddrMode;
4217
4218 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4219 // [A+B + A*7] -> [B+A*8].
4220 TestAddrMode.Scale += Scale;
4221 TestAddrMode.ScaledReg = ScaleReg;
4222
4223 // If the new address isn't legal, bail out.
4224 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4225 return false;
4226
4227 // It was legal, so commit it.
4228 AddrMode = TestAddrMode;
4229
4230 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4231 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4232 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4233 // go any further: we can reuse it and cannot eliminate it.
4234 ConstantInt *CI = nullptr;
4235 Value *AddLHS = nullptr;
4236 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4237 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4238 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4239 TestAddrMode.InBounds = false;
4240 TestAddrMode.ScaledReg = AddLHS;
4241 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4242
4243 // If this addressing mode is legal, commit it and remember that we folded
4244 // this instruction.
4245 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4246 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4247 AddrMode = TestAddrMode;
4248 return true;
4249 }
4250 // Restore status quo.
4251 TestAddrMode = AddrMode;
4252 }
4253
4254 // If this is an add recurrence with a constant step, return the increment
4255 // instruction and the canonicalized step.
4256 auto GetConstantStep =
4257 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4258 auto *PN = dyn_cast<PHINode>(V);
4259 if (!PN)
4260 return std::nullopt;
4261 auto IVInc = getIVIncrement(PN, &LI);
4262 if (!IVInc)
4263 return std::nullopt;
4264 // TODO: The result of the intrinsics above is two-complement. However when
4265 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4266 // If it has nuw or nsw flags, we need to make sure that these flags are
4267 // inferrable at the point of memory instruction. Otherwise we are replacing
4268 // well-defined two-complement computation with poison. Currently, to avoid
4269 // potentially complex analysis needed to prove this, we reject such cases.
4270 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4271 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4272 return std::nullopt;
4273 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4274 return std::make_pair(IVInc->first, ConstantStep->getValue());
4275 return std::nullopt;
4276 };
4277
4278 // Try to account for the following special case:
4279 // 1. ScaleReg is an inductive variable;
4280 // 2. We use it with non-zero offset;
4281 // 3. IV's increment is available at the point of memory instruction.
4282 //
4283 // In this case, we may reuse the IV increment instead of the IV Phi to
4284 // achieve the following advantages:
4285 // 1. If IV step matches the offset, we will have no need in the offset;
4286 // 2. Even if they don't match, we will reduce the overlap of living IV
4287 // and IV increment, that will potentially lead to better register
4288 // assignment.
4289 if (AddrMode.BaseOffs) {
4290 if (auto IVStep = GetConstantStep(ScaleReg)) {
4291 Instruction *IVInc = IVStep->first;
4292 // The following assert is important to ensure a lack of infinite loops.
4293 // This transforms is (intentionally) the inverse of the one just above.
4294 // If they don't agree on the definition of an increment, we'd alternate
4295 // back and forth indefinitely.
4296 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4297 APInt Step = IVStep->second;
4298 APInt Offset = Step * AddrMode.Scale;
4299 if (Offset.isSignedIntN(64)) {
4300 TestAddrMode.InBounds = false;
4301 TestAddrMode.ScaledReg = IVInc;
4302 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4303 // If this addressing mode is legal, commit it..
4304 // (Note that we defer the (expensive) domtree base legality check
4305 // to the very last possible point.)
4306 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4307 getDTFn().dominates(IVInc, MemoryInst)) {
4308 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4309 AddrMode = TestAddrMode;
4310 return true;
4311 }
4312 // Restore status quo.
4313 TestAddrMode = AddrMode;
4314 }
4315 }
4316 }
4317
4318 // Otherwise, just return what we have.
4319 return true;
4320}
4321
4322/// This is a little filter, which returns true if an addressing computation
4323/// involving I might be folded into a load/store accessing it.
4324/// This doesn't need to be perfect, but needs to accept at least
4325/// the set of instructions that MatchOperationAddr can.
4327 switch (I->getOpcode()) {
4328 case Instruction::BitCast:
4329 case Instruction::AddrSpaceCast:
4330 // Don't touch identity bitcasts.
4331 if (I->getType() == I->getOperand(0)->getType())
4332 return false;
4333 return I->getType()->isIntOrPtrTy();
4334 case Instruction::PtrToInt:
4335 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4336 return true;
4337 case Instruction::IntToPtr:
4338 // We know the input is intptr_t, so this is foldable.
4339 return true;
4340 case Instruction::Add:
4341 return true;
4342 case Instruction::Mul:
4343 case Instruction::Shl:
4344 // Can only handle X*C and X << C.
4345 return isa<ConstantInt>(I->getOperand(1));
4346 case Instruction::GetElementPtr:
4347 return true;
4348 default:
4349 return false;
4350 }
4351}
4352
4353/// Check whether or not \p Val is a legal instruction for \p TLI.
4354/// \note \p Val is assumed to be the product of some type promotion.
4355/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4356/// to be legal, as the non-promoted value would have had the same state.
4358 const DataLayout &DL, Value *Val) {
4359 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4360 if (!PromotedInst)
4361 return false;
4362 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4363 // If the ISDOpcode is undefined, it was undefined before the promotion.
4364 if (!ISDOpcode)
4365 return true;
4366 // Otherwise, check if the promoted instruction is legal or not.
4367 return TLI.isOperationLegalOrCustom(
4368 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4369}
4370
4371namespace {
4372
4373/// Hepler class to perform type promotion.
4374class TypePromotionHelper {
4375 /// Utility function to add a promoted instruction \p ExtOpnd to
4376 /// \p PromotedInsts and record the type of extension we have seen.
4377 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4378 Instruction *ExtOpnd, bool IsSExt) {
4379 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4380 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4381 if (It != PromotedInsts.end()) {
4382 // If the new extension is same as original, the information in
4383 // PromotedInsts[ExtOpnd] is still correct.
4384 if (It->second.getInt() == ExtTy)
4385 return;
4386
4387 // Now the new extension is different from old extension, we make
4388 // the type information invalid by setting extension type to
4389 // BothExtension.
4390 ExtTy = BothExtension;
4391 }
4392 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4393 }
4394
4395 /// Utility function to query the original type of instruction \p Opnd
4396 /// with a matched extension type. If the extension doesn't match, we
4397 /// cannot use the information we had on the original type.
4398 /// BothExtension doesn't match any extension type.
4399 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4400 Instruction *Opnd, bool IsSExt) {
4401 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4402 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4403 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4404 return It->second.getPointer();
4405 return nullptr;
4406 }
4407
4408 /// Utility function to check whether or not a sign or zero extension
4409 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4410 /// either using the operands of \p Inst or promoting \p Inst.
4411 /// The type of the extension is defined by \p IsSExt.
4412 /// In other words, check if:
4413 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4414 /// #1 Promotion applies:
4415 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4416 /// #2 Operand reuses:
4417 /// ext opnd1 to ConsideredExtType.
4418 /// \p PromotedInsts maps the instructions to their type before promotion.
4419 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4420 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4421
4422 /// Utility function to determine if \p OpIdx should be promoted when
4423 /// promoting \p Inst.
4424 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4425 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4426 }
4427
4428 /// Utility function to promote the operand of \p Ext when this
4429 /// operand is a promotable trunc or sext or zext.
4430 /// \p PromotedInsts maps the instructions to their type before promotion.
4431 /// \p CreatedInstsCost[out] contains the cost of all instructions
4432 /// created to promote the operand of Ext.
4433 /// Newly added extensions are inserted in \p Exts.
4434 /// Newly added truncates are inserted in \p Truncs.
4435 /// Should never be called directly.
4436 /// \return The promoted value which is used instead of Ext.
4437 static Value *promoteOperandForTruncAndAnyExt(
4438 Instruction *Ext, TypePromotionTransaction &TPT,
4439 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4442
4443 /// Utility function to promote the operand of \p Ext when this
4444 /// operand is promotable and is not a supported trunc or sext.
4445 /// \p PromotedInsts maps the instructions to their type before promotion.
4446 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4447 /// created to promote the operand of Ext.
4448 /// Newly added extensions are inserted in \p Exts.
4449 /// Newly added truncates are inserted in \p Truncs.
4450 /// Should never be called directly.
4451 /// \return The promoted value which is used instead of Ext.
4452 static Value *promoteOperandForOther(Instruction *Ext,
4453 TypePromotionTransaction &TPT,
4454 InstrToOrigTy &PromotedInsts,
4455 unsigned &CreatedInstsCost,
4458 const TargetLowering &TLI, bool IsSExt);
4459
4460 /// \see promoteOperandForOther.
4461 static Value *signExtendOperandForOther(
4462 Instruction *Ext, TypePromotionTransaction &TPT,
4463 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4465 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4466 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4467 Exts, Truncs, TLI, true);
4468 }
4469
4470 /// \see promoteOperandForOther.
4471 static Value *zeroExtendOperandForOther(
4472 Instruction *Ext, TypePromotionTransaction &TPT,
4473 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4475 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4476 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4477 Exts, Truncs, TLI, false);
4478 }
4479
4480public:
4481 /// Type for the utility function that promotes the operand of Ext.
4482 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4483 InstrToOrigTy &PromotedInsts,
4484 unsigned &CreatedInstsCost,
4487 const TargetLowering &TLI);
4488
4489 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4490 /// action to promote the operand of \p Ext instead of using Ext.
4491 /// \return NULL if no promotable action is possible with the current
4492 /// sign extension.
4493 /// \p InsertedInsts keeps track of all the instructions inserted by the
4494 /// other CodeGenPrepare optimizations. This information is important
4495 /// because we do not want to promote these instructions as CodeGenPrepare
4496 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4497 /// \p PromotedInsts maps the instructions to their type before promotion.
4498 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4499 const TargetLowering &TLI,
4500 const InstrToOrigTy &PromotedInsts);
4501};
4502
4503} // end anonymous namespace
4504
4505bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4506 Type *ConsideredExtType,
4507 const InstrToOrigTy &PromotedInsts,
4508 bool IsSExt) {
4509 // The promotion helper does not know how to deal with vector types yet.
4510 // To be able to fix that, we would need to fix the places where we
4511 // statically extend, e.g., constants and such.
4512 if (Inst->getType()->isVectorTy())
4513 return false;
4514
4515 // We can always get through zext.
4516 if (isa<ZExtInst>(Inst))
4517 return true;
4518
4519 // sext(sext) is ok too.
4520 if (IsSExt && isa<SExtInst>(Inst))
4521 return true;
4522
4523 // We can get through binary operator, if it is legal. In other words, the
4524 // binary operator must have a nuw or nsw flag.
4525 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4526 if (isa<OverflowingBinaryOperator>(BinOp) &&
4527 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4528 (IsSExt && BinOp->hasNoSignedWrap())))
4529 return true;
4530
4531 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4532 if ((Inst->getOpcode() == Instruction::And ||
4533 Inst->getOpcode() == Instruction::Or))
4534 return true;
4535
4536 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4537 if (Inst->getOpcode() == Instruction::Xor) {
4538 // Make sure it is not a NOT.
4539 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4540 if (!Cst->getValue().isAllOnes())
4541 return true;
4542 }
4543
4544 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4545 // It may change a poisoned value into a regular value, like
4546 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4547 // poisoned value regular value
4548 // It should be OK since undef covers valid value.
4549 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4550 return true;
4551
4552 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4553 // It may change a poisoned value into a regular value, like
4554 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4555 // poisoned value regular value
4556 // It should be OK since undef covers valid value.
4557 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4558 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4559 if (ExtInst->hasOneUse()) {
4560 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4561 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4562 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4563 if (Cst &&
4564 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4565 return true;
4566 }
4567 }
4568 }
4569
4570 // Check if we can do the following simplification.
4571 // ext(trunc(opnd)) --> ext(opnd)
4572 if (!isa<TruncInst>(Inst))
4573 return false;
4574
4575 Value *OpndVal = Inst->getOperand(0);
4576 // Check if we can use this operand in the extension.
4577 // If the type is larger than the result type of the extension, we cannot.
4578 if (!OpndVal->getType()->isIntegerTy() ||
4579 OpndVal->getType()->getIntegerBitWidth() >
4580 ConsideredExtType->getIntegerBitWidth())
4581 return false;
4582
4583 // If the operand of the truncate is not an instruction, we will not have
4584 // any information on the dropped bits.
4585 // (Actually we could for constant but it is not worth the extra logic).
4586 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4587 if (!Opnd)
4588 return false;
4589
4590 // Check if the source of the type is narrow enough.
4591 // I.e., check that trunc just drops extended bits of the same kind of
4592 // the extension.
4593 // #1 get the type of the operand and check the kind of the extended bits.
4594 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4595 if (OpndType)
4596 ;
4597 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4598 OpndType = Opnd->getOperand(0)->getType();
4599 else
4600 return false;
4601
4602 // #2 check that the truncate just drops extended bits.
4603 return Inst->getType()->getIntegerBitWidth() >=
4604 OpndType->getIntegerBitWidth();
4605}
4606
4607TypePromotionHelper::Action TypePromotionHelper::getAction(
4608 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4609 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4610 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4611 "Unexpected instruction type");
4612 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4613 Type *ExtTy = Ext->getType();
4614 bool IsSExt = isa<SExtInst>(Ext);
4615 // If the operand of the extension is not an instruction, we cannot
4616 // get through.
4617 // If it, check we can get through.
4618 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4619 return nullptr;
4620
4621 // Do not promote if the operand has been added by codegenprepare.
4622 // Otherwise, it means we are undoing an optimization that is likely to be
4623 // redone, thus causing potential infinite loop.
4624 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4625 return nullptr;
4626
4627 // SExt or Trunc instructions.
4628 // Return the related handler.
4629 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4630 isa<ZExtInst>(ExtOpnd))
4631 return promoteOperandForTruncAndAnyExt;
4632
4633 // Regular instruction.
4634 // Abort early if we will have to insert non-free instructions.
4635 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4636 return nullptr;
4637 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4638}
4639
4640Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4641 Instruction *SExt, TypePromotionTransaction &TPT,
4642 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4644 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4645 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4646 // get through it and this method should not be called.
4647 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4648 Value *ExtVal = SExt;
4649 bool HasMergedNonFreeExt = false;
4650 if (isa<ZExtInst>(SExtOpnd)) {
4651 // Replace s|zext(zext(opnd))
4652 // => zext(opnd).
4653 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4654 Value *ZExt =
4655 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4656 TPT.replaceAllUsesWith(SExt, ZExt);
4657 TPT.eraseInstruction(SExt);
4658 ExtVal = ZExt;
4659 } else {
4660 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4661 // => z|sext(opnd).
4662 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4663 }
4664 CreatedInstsCost = 0;
4665
4666 // Remove dead code.
4667 if (SExtOpnd->use_empty())
4668 TPT.eraseInstruction(SExtOpnd);
4669
4670 // Check if the extension is still needed.
4671 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4672 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4673 if (ExtInst) {
4674 if (Exts)
4675 Exts->push_back(ExtInst);
4676 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4677 }
4678 return ExtVal;
4679 }
4680
4681 // At this point we have: ext ty opnd to ty.
4682 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
4683 Value *NextVal = ExtInst->getOperand(0);
4684 TPT.eraseInstruction(ExtInst, NextVal);
4685 return NextVal;
4686}
4687
4688Value *TypePromotionHelper::promoteOperandForOther(
4689 Instruction *Ext, TypePromotionTransaction &TPT,
4690 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4693 bool IsSExt) {
4694 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4695 // get through it and this method should not be called.
4696 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4697 CreatedInstsCost = 0;
4698 if (!ExtOpnd->hasOneUse()) {
4699 // ExtOpnd will be promoted.
4700 // All its uses, but Ext, will need to use a truncated value of the
4701 // promoted version.
4702 // Create the truncate now.
4703 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4704 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4705 // Insert it just after the definition.
4706 ITrunc->moveAfter(ExtOpnd);
4707 if (Truncs)
4708 Truncs->push_back(ITrunc);
4709 }
4710
4711 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4712 // Restore the operand of Ext (which has been replaced by the previous call
4713 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
4714 TPT.setOperand(Ext, 0, ExtOpnd);
4715 }
4716
4717 // Get through the Instruction:
4718 // 1. Update its type.
4719 // 2. Replace the uses of Ext by Inst.
4720 // 3. Extend each operand that needs to be extended.
4721
4722 // Remember the original type of the instruction before promotion.
4723 // This is useful to know that the high bits are sign extended bits.
4724 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
4725 // Step #1.
4726 TPT.mutateType(ExtOpnd, Ext->getType());
4727 // Step #2.
4728 TPT.replaceAllUsesWith(Ext, ExtOpnd);
4729 // Step #3.
4730 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
4731 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
4732 ++OpIdx) {
4733 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
4734 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
4735 !shouldExtOperand(ExtOpnd, OpIdx)) {
4736 LLVM_DEBUG(dbgs() << "No need to propagate\n");
4737 continue;
4738 }
4739 // Check if we can statically extend the operand.
4740 Value *Opnd = ExtOpnd->getOperand(OpIdx);
4741 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
4742 LLVM_DEBUG(dbgs() << "Statically extend\n");
4743 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
4744 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
4745 : Cst->getValue().zext(BitWidth);
4746 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
4747 continue;
4748 }
4749 // UndefValue are typed, so we have to statically sign extend them.
4750 if (isa<UndefValue>(Opnd)) {
4751 LLVM_DEBUG(dbgs() << "Statically extend\n");
4752 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
4753 continue;
4754 }
4755
4756 // Otherwise we have to explicitly sign extend the operand.
4757 Value *ValForExtOpnd = IsSExt
4758 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
4759 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
4760 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
4761 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
4762 if (!InstForExtOpnd)
4763 continue;
4764
4765 if (Exts)
4766 Exts->push_back(InstForExtOpnd);
4767
4768 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
4769 }
4770 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
4771 TPT.eraseInstruction(Ext);
4772 return ExtOpnd;
4773}
4774
4775/// Check whether or not promoting an instruction to a wider type is profitable.
4776/// \p NewCost gives the cost of extension instructions created by the
4777/// promotion.
4778/// \p OldCost gives the cost of extension instructions before the promotion
4779/// plus the number of instructions that have been
4780/// matched in the addressing mode the promotion.
4781/// \p PromotedOperand is the value that has been promoted.
4782/// \return True if the promotion is profitable, false otherwise.
4783bool AddressingModeMatcher::isPromotionProfitable(
4784 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4785 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
4786 << '\n');
4787 // The cost of the new extensions is greater than the cost of the
4788 // old extension plus what we folded.
4789 // This is not profitable.
4790 if (NewCost > OldCost)
4791 return false;
4792 if (NewCost < OldCost)
4793 return true;
4794 // The promotion is neutral but it may help folding the sign extension in
4795 // loads for instance.
4796 // Check that we did not create an illegal instruction.
4797 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4798}
4799
4800/// Given an instruction or constant expr, see if we can fold the operation
4801/// into the addressing mode. If so, update the addressing mode and return
4802/// true, otherwise return false without modifying AddrMode.
4803/// If \p MovedAway is not NULL, it contains the information of whether or
4804/// not AddrInst has to be folded into the addressing mode on success.
4805/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4806/// because it has been moved away.
4807/// Thus AddrInst must not be added in the matched instructions.
4808/// This state can happen when AddrInst is a sext, since it may be moved away.
4809/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4810/// not be referenced anymore.
4811bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4812 unsigned Depth,
4813 bool *MovedAway) {
4814 // Avoid exponential behavior on extremely deep expression trees.
4815 if (Depth >= 5)
4816 return false;
4817
4818 // By default, all matched instructions stay in place.
4819 if (MovedAway)
4820 *MovedAway = false;
4821
4822 switch (Opcode) {
4823 case Instruction::PtrToInt:
4824 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4825 return matchAddr(AddrInst->getOperand(0), Depth);
4826 case Instruction::IntToPtr: {
4827 auto AS = AddrInst->getType()->getPointerAddressSpace();
4828 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4829 // This inttoptr is a no-op if the integer type is pointer sized.
4830 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4831 return matchAddr(AddrInst->getOperand(0), Depth);
4832 return false;
4833 }
4834 case Instruction::BitCast:
4835 // BitCast is always a noop, and we can handle it as long as it is
4836 // int->int or pointer->pointer (we don't want int<->fp or something).
4837 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4838 // Don't touch identity bitcasts. These were probably put here by LSR,
4839 // and we don't want to mess around with them. Assume it knows what it
4840 // is doing.
4841 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4842 return matchAddr(AddrInst->getOperand(0), Depth);
4843 return false;
4844 case Instruction::AddrSpaceCast: {
4845 unsigned SrcAS =
4846 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4847 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4848 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
4849 return matchAddr(AddrInst->getOperand(0), Depth);
4850 return false;
4851 }
4852 case Instruction::Add: {
4853 // Check to see if we can merge in one operand, then the other. If so, we
4854 // win.
4855 ExtAddrMode BackupAddrMode = AddrMode;
4856 unsigned OldSize = AddrModeInsts.size();
4857 // Start a transaction at this point.
4858 // The LHS may match but not the RHS.
4859 // Therefore, we need a higher level restoration point to undo partially
4860 // matched operation.
4861 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4862 TPT.getRestorationPoint();
4863
4864 // Try to match an integer constant second to increase its chance of ending
4865 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
4866 int First = 0, Second = 1;
4867 if (isa<ConstantInt>(AddrInst->getOperand(First))
4868 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
4869 std::swap(First, Second);
4870 AddrMode.InBounds = false;
4871 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
4872 matchAddr(AddrInst->getOperand(Second), Depth + 1))
4873 return true;
4874
4875 // Restore the old addr mode info.
4876 AddrMode = BackupAddrMode;
4877 AddrModeInsts.resize(OldSize);
4878 TPT.rollback(LastKnownGood);
4879
4880 // Otherwise this was over-aggressive. Try merging operands in the opposite
4881 // order.
4882 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
4883 matchAddr(AddrInst->getOperand(First), Depth + 1))
4884 return true;
4885
4886 // Otherwise we definitely can't merge the ADD in.
4887 AddrMode = BackupAddrMode;
4888 AddrModeInsts.resize(OldSize);
4889 TPT.rollback(LastKnownGood);
4890 break;
4891 }
4892 // case Instruction::Or:
4893 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
4894 // break;
4895 case Instruction::Mul:
4896 case Instruction::Shl: {
4897 // Can only handle X*C and X << C.
4898 AddrMode.InBounds = false;
4899 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
4900 if (!RHS || RHS->getBitWidth() > 64)
4901 return false;
4902 int64_t Scale = Opcode == Instruction::Shl
4903 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
4904 : RHS->getSExtValue();
4905
4906 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
4907 }
4908 case Instruction::GetElementPtr: {
4909 // Scan the GEP. We check it if it contains constant offsets and at most
4910 // one variable offset.
4911 int VariableOperand = -1;
4912 unsigned VariableScale = 0;
4913
4914 int64_t ConstantOffset = 0;
4915 gep_type_iterator GTI = gep_type_begin(AddrInst);
4916 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
4917 if (StructType *STy = GTI.getStructTypeOrNull()) {
4918 const StructLayout *SL = DL.getStructLayout(STy);
4919 unsigned Idx =
4920 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
4921 ConstantOffset += SL->getElementOffset(Idx);
4922 } else {
4924 if (TS.isNonZero()) {
4925 // The optimisations below currently only work for fixed offsets.
4926 if (TS.isScalable())
4927 return false;
4928 int64_t TypeSize = TS.getFixedValue();
4929 if (ConstantInt *CI =
4930 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
4931 const APInt &CVal = CI->getValue();
4932 if (CVal.getSignificantBits() <= 64) {
4933 ConstantOffset += CVal.getSExtValue() * TypeSize;
4934 continue;
4935 }
4936 }
4937 // We only allow one variable index at the moment.
4938 if (VariableOperand != -1)
4939 return false;
4940
4941 // Remember the variable index.
4942 VariableOperand = i;
4943 VariableScale = TypeSize;
4944 }
4945 }
4946 }
4947
4948 // A common case is for the GEP to only do a constant offset. In this case,
4949 // just add it to the disp field and check validity.
4950 if (VariableOperand == -1) {
4951 AddrMode.BaseOffs += ConstantOffset;
4952 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
4953 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4954 AddrMode.InBounds = false;
4955 return true;
4956 }
4957 AddrMode.BaseOffs -= ConstantOffset;
4958
4959 if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4960 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4961 ConstantOffset > 0) {
4962 // Record GEPs with non-zero offsets as candidates for splitting in
4963 // the event that the offset cannot fit into the r+i addressing mode.
4964 // Simple and common case that only one GEP is used in calculating the
4965 // address for the memory access.
4966 Value *Base = AddrInst->getOperand(0);
4967 auto *BaseI = dyn_cast<Instruction>(Base);
4968 auto *GEP = cast<GetElementPtrInst>(AddrInst);
4969 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4970 (BaseI && !isa<CastInst>(BaseI) &&
4971 !isa<GetElementPtrInst>(BaseI))) {
4972 // Make sure the parent block allows inserting non-PHI instructions
4973 // before the terminator.
4974 BasicBlock *Parent = BaseI ? BaseI->getParent()
4975 : &GEP->getFunction()->getEntryBlock();
4976 if (!Parent->getTerminator()->isEHPad())
4977 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4978 }
4979 }
4980
4981 return false;
4982 }
4983
4984 // Save the valid addressing mode in case we can't match.
4985 ExtAddrMode BackupAddrMode = AddrMode;
4986 unsigned OldSize = AddrModeInsts.size();
4987
4988 // See if the scale and offset amount is valid for this target.
4989 AddrMode.BaseOffs += ConstantOffset;
4990 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4991 AddrMode.InBounds = false;
4992
4993 // Match the base operand of the GEP.
4994 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
4995 // If it couldn't be matched, just stuff the value in a register.
4996 if (AddrMode.HasBaseReg) {
4997 AddrMode = BackupAddrMode;
4998 AddrModeInsts.resize(OldSize);
4999 return false;
5000 }
5001 AddrMode.HasBaseReg = true;
5002 AddrMode.BaseReg = AddrInst->getOperand(0);
5003 }
5004
5005 // Match the remaining variable portion of the GEP.
5006 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5007 Depth)) {
5008 // If it couldn't be matched, try stuffing the base into a register
5009 // instead of matching it, and retrying the match of the scale.
5010 AddrMode = BackupAddrMode;
5011 AddrModeInsts.resize(OldSize);
5012 if (AddrMode.HasBaseReg)
5013 return false;
5014 AddrMode.HasBaseReg = true;
5015 AddrMode.BaseReg = AddrInst->getOperand(0);
5016 AddrMode.BaseOffs += ConstantOffset;
5017 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5018 VariableScale, Depth)) {
5019 // If even that didn't work, bail.
5020 AddrMode = BackupAddrMode;
5021 AddrModeInsts.resize(OldSize);
5022 return false;
5023 }
5024 }
5025
5026 return true;
5027 }
5028 case Instruction::SExt:
5029 case Instruction::ZExt: {
5030 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5031 if (!Ext)
5032 return false;
5033
5034 // Try to move this ext out of the way of the addressing mode.
5035 // Ask for a method for doing so.
5036 TypePromotionHelper::Action TPH =
5037 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5038 if (!TPH)
5039 return false;
5040
5041 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5042 TPT.getRestorationPoint();
5043 unsigned CreatedInstsCost = 0;
5044 unsigned ExtCost = !TLI.isExtFree(Ext);
5045 Value *PromotedOperand =
5046 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5047 // SExt has been moved away.
5048 // Thus either it will be rematched later in the recursive calls or it is
5049 // gone. Anyway, we must not fold it into the addressing mode at this point.
5050 // E.g.,
5051 // op = add opnd, 1
5052 // idx = ext op
5053 // addr = gep base, idx
5054 // is now:
5055 // promotedOpnd = ext opnd <- no match here
5056 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5057 // addr = gep base, op <- match
5058 if (MovedAway)
5059 *MovedAway = true;
5060
5061 assert(PromotedOperand &&
5062 "TypePromotionHelper should have filtered out those cases");
5063
5064 ExtAddrMode BackupAddrMode = AddrMode;
5065 unsigned OldSize = AddrModeInsts.size();
5066
5067 if (!matchAddr(PromotedOperand, Depth) ||
5068 // The total of the new cost is equal to the cost of the created
5069 // instructions.
5070 // The total of the old cost is equal to the cost of the extension plus
5071 // what we have saved in the addressing mode.
5072 !isPromotionProfitable(CreatedInstsCost,
5073 ExtCost + (AddrModeInsts.size() - OldSize),
5074 PromotedOperand)) {
5075 AddrMode = BackupAddrMode;
5076 AddrModeInsts.resize(OldSize);
5077 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5078 TPT.rollback(LastKnownGood);
5079 return false;
5080 }
5081 return true;
5082 }
5083 case Instruction::Call:
5084 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5085 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5086 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5087 if (TLI.addressingModeSupportsTLS(GV))
5088 return matchAddr(AddrInst->getOperand(0), Depth);
5089 }
5090 }
5091 break;
5092 }
5093 return false;
5094}
5095
5096/// If we can, try to add the value of 'Addr' into the current addressing mode.
5097/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5098/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5099/// for the target.
5100///
5101bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5102 // Start a transaction at this point that we will rollback if the matching
5103 // fails.
5104 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5105 TPT.getRestorationPoint();
5106 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5107 if (CI->getValue().isSignedIntN(64)) {
5108 // Fold in immediates if legal for the target.
5109 AddrMode.BaseOffs += CI->getSExtValue();
5110 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5111 return true;
5112 AddrMode.BaseOffs -= CI->getSExtValue();
5113 }
5114 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5115 // If this is a global variable, try to fold it into the addressing mode.
5116 if (!AddrMode.BaseGV) {
5117 AddrMode.BaseGV = GV;
5118 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5119 return true;
5120 AddrMode.BaseGV = nullptr;
5121 }
5122 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5123 ExtAddrMode BackupAddrMode = AddrMode;
5124 unsigned OldSize = AddrModeInsts.size();
5125
5126 // Check to see if it is possible to fold this operation.
5127 bool MovedAway = false;
5128 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5129 // This instruction may have been moved away. If so, there is nothing
5130 // to check here.
5131 if (MovedAway)
5132 return true;
5133 // Okay, it's possible to fold this. Check to see if it is actually
5134 // *profitable* to do so. We use a simple cost model to avoid increasing
5135 // register pressure too much.
5136 if (I->hasOneUse() ||
5137 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5138 AddrModeInsts.push_back(I);
5139 return true;
5140 }
5141
5142 // It isn't profitable to do this, roll back.
5143 AddrMode = BackupAddrMode;
5144 AddrModeInsts.resize(OldSize);
5145 TPT.rollback(LastKnownGood);
5146 }
5147 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5148 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5149 return true;
5150 TPT.rollback(LastKnownGood);
5151 } else if (isa<ConstantPointerNull>(Addr)) {
5152 // Null pointer gets folded without affecting the addressing mode.
5153 return true;
5154 }
5155
5156 // Worse case, the target should support [reg] addressing modes. :)
5157 if (!AddrMode.HasBaseReg) {
5158 AddrMode.HasBaseReg = true;
5159 AddrMode.BaseReg = Addr;
5160 // Still check for legality in case the target supports [imm] but not [i+r].
5161 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5162 return true;
5163 AddrMode.HasBaseReg = false;
5164 AddrMode.BaseReg = nullptr;
5165 }
5166
5167 // If the base register is already taken, see if we can do [r+r].
5168 if (AddrMode.Scale == 0) {
5169 AddrMode.Scale = 1;
5170 AddrMode.ScaledReg = Addr;
5171 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5172 return true;
5173 AddrMode.Scale = 0;
5174 AddrMode.ScaledReg = nullptr;
5175 }
5176 // Couldn't match.
5177 TPT.rollback(LastKnownGood);
5178 return false;
5179}
5180
5181/// Check to see if all uses of OpVal by the specified inline asm call are due
5182/// to memory operands. If so, return true, otherwise return false.
5184 const TargetLowering &TLI,
5185 const TargetRegisterInfo &TRI) {
5186 const Function *F = CI->getFunction();
5187 TargetLowering::AsmOperandInfoVector TargetConstraints =
5188 TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI);
5189
5190 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5191 // Compute the constraint code and ConstraintType to use.
5192 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5193
5194 // If this asm operand is our Value*, and if it isn't an indirect memory
5195 // operand, we can't fold it! TODO: Also handle C_Address?
5196 if (OpInfo.CallOperandVal == OpVal &&
5197 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5198 !OpInfo.isIndirect))
5199 return false;
5200 }
5201
5202 return true;
5203}
5204
5205/// Recursively walk all the uses of I until we find a memory use.
5206/// If we find an obviously non-foldable instruction, return true.
5207/// Add accessed addresses and types to MemoryUses.
5209 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5210 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5211 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5212 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5213 // If we already considered this instruction, we're done.
5214 if (!ConsideredInsts.insert(I).second)
5215 return false;
5216
5217 // If this is an obviously unfoldable instruction, bail out.
5218 if (!MightBeFoldableInst(I))
5219 return true;
5220
5221 // Loop over all the uses, recursively processing them.
5222 for (Use &U : I->uses()) {
5223 // Conservatively return true if we're seeing a large number or a deep chain
5224 // of users. This avoids excessive compilation times in pathological cases.
5225 if (SeenInsts++ >= MaxAddressUsersToScan)
5226 return true;
5227
5228 Instruction *UserI = cast<Instruction>(U.getUser());
5229 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5230 MemoryUses.push_back({&U, LI->getType()});
5231 continue;
5232 }
5233
5234 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5235 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5236 return true; // Storing addr, not into addr.
5237 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5238 continue;
5239 }
5240
5241 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5242 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5243 return true; // Storing addr, not into addr.
5244 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5245 continue;
5246 }
5247
5248 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
5249 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5250 return true; // Storing addr, not into addr.
5251 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5252 continue;
5253 }
5254
5255 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5256 if (CI->hasFnAttr(Attribute::Cold)) {
5257 // If this is a cold call, we can sink the addressing calculation into
5258 // the cold path. See optimizeCallInst
5259 bool OptForSize =
5260 OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
5261 if (!OptForSize)
5262 continue;
5263 }
5264
5265 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5266 if (!IA)
5267 return true;
5268
5269 // If this is a memory operand, we're cool, otherwise bail out.
5270 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5271 return true;
5272 continue;
5273 }
5274
5275 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5276 PSI, BFI, SeenInsts))
5277 return true;
5278 }
5279
5280 return false;
5281}
5282
5284 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5285 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5287 unsigned SeenInsts = 0;
5288 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5289 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5290 PSI, BFI, SeenInsts);
5291}
5292
5293
5294/// Return true if Val is already known to be live at the use site that we're
5295/// folding it into. If so, there is no cost to include it in the addressing
5296/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5297/// instruction already.
5298bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5299 Value *KnownLive1,
5300 Value *KnownLive2) {
5301 // If Val is either of the known-live values, we know it is live!
5302 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5303 return true;
5304
5305 // All values other than instructions and arguments (e.g. constants) are live.
5306 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5307 return true;
5308
5309 // If Val is a constant sized alloca in the entry block, it is live, this is
5310 // true because it is just a reference to the stack/frame pointer, which is
5311 // live for the whole function.
5312 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5313 if (AI->isStaticAlloca())
5314 return true;
5315
5316 // Check to see if this value is already used in the memory instruction's
5317 // block. If so, it's already live into the block at the very least, so we
5318 // can reasonably fold it.
5319 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5320}
5321
5322/// It is possible for the addressing mode of the machine to fold the specified
5323/// instruction into a load or store that ultimately uses it.
5324/// However, the specified instruction has multiple uses.
5325/// Given this, it may actually increase register pressure to fold it
5326/// into the load. For example, consider this code:
5327///
5328/// X = ...
5329/// Y = X+1
5330/// use(Y) -> nonload/store
5331/// Z = Y+1
5332/// load Z
5333///
5334/// In this case, Y has multiple uses, and can be folded into the load of Z
5335/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5336/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5337/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5338/// number of computations either.
5339///
5340/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5341/// X was live across 'load Z' for other reasons, we actually *would* want to
5342/// fold the addressing mode in the Z case. This would make Y die earlier.
5343bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5344 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5345 if (IgnoreProfitability)
5346 return true;
5347
5348 // AMBefore is the addressing mode before this instruction was folded into it,
5349 // and AMAfter is the addressing mode after the instruction was folded. Get
5350 // the set of registers referenced by AMAfter and subtract out those
5351 // referenced by AMBefore: this is the set of values which folding in this
5352 // address extends the lifetime of.
5353 //
5354 // Note that there are only two potential values being referenced here,
5355 // BaseReg and ScaleReg (global addresses are always available, as are any
5356 // folded immediates).
5357 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5358
5359 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5360 // lifetime wasn't extended by adding this instruction.
5361 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5362 BaseReg = nullptr;
5363 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5364 ScaledReg = nullptr;
5365
5366 // If folding this instruction (and it's subexprs) didn't extend any live
5367 // ranges, we're ok with it.
5368 if (!BaseReg && !ScaledReg)
5369 return true;
5370
5371 // If all uses of this instruction can have the address mode sunk into them,
5372 // we can remove the addressing mode and effectively trade one live register
5373 // for another (at worst.) In this context, folding an addressing mode into
5374 // the use is just a particularly nice way of sinking it.
5376 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5377 return false; // Has a non-memory, non-foldable use!
5378
5379 // Now that we know that all uses of this instruction are part of a chain of
5380 // computation involving only operations that could theoretically be folded
5381 // into a memory use, loop over each of these memory operation uses and see
5382 // if they could *actually* fold the instruction. The assumption is that
5383 // addressing modes are cheap and that duplicating the computation involved
5384 // many times is worthwhile, even on a fastpath. For sinking candidates
5385 // (i.e. cold call sites), this serves as a way to prevent excessive code
5386 // growth since most architectures have some reasonable small and fast way to
5387 // compute an effective address. (i.e LEA on x86)
5388 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5389 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5390 Value *Address = Pair.first->get();
5391 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5392 Type *AddressAccessTy = Pair.second;
5393 unsigned AS = Address->getType()->getPointerAddressSpace();
5394
5395 // Do a match against the root of this address, ignoring profitability. This
5396 // will tell us if the addressing mode for the memory operation will
5397 // *actually* cover the shared instruction.
5399 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5400 0);
5401 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5402 TPT.getRestorationPoint();
5403 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5404 AddressAccessTy, AS, UserI, Result,
5405 InsertedInsts, PromotedInsts, TPT,
5406 LargeOffsetGEP, OptSize, PSI, BFI);
5407 Matcher.IgnoreProfitability = true;
5408 bool Success = Matcher.matchAddr(Address, 0);
5409 (void)Success;
5410 assert(Success && "Couldn't select *anything*?");
5411
5412 // The match was to check the profitability, the changes made are not
5413 // part of the original matcher. Therefore, they should be dropped
5414 // otherwise the original matcher will not present the right state.
5415 TPT.rollback(LastKnownGood);
5416
5417 // If the match didn't cover I, then it won't be shared by it.
5418 if (!is_contained(MatchedAddrModeInsts, I))
5419 return false;
5420
5421 MatchedAddrModeInsts.clear();
5422 }
5423
5424 return true;
5425}
5426
5427/// Return true if the specified values are defined in a
5428/// different basic block than BB.
5429static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5430 if (Instruction *I = dyn_cast<Instruction>(V))
5431 return I->getParent() != BB;
5432 return false;
5433}
5434
5435/// Sink addressing mode computation immediate before MemoryInst if doing so
5436/// can be done without increasing register pressure. The need for the
5437/// register pressure constraint means this can end up being an all or nothing
5438/// decision for all uses of the same addressing computation.
5439///
5440/// Load and Store Instructions often have addressing modes that can do
5441/// significant amounts of computation. As such, instruction selection will try
5442/// to get the load or store to do as much computation as possible for the
5443/// program. The problem is that isel can only see within a single block. As
5444/// such, we sink as much legal addressing mode work into the block as possible.
5445///
5446/// This method is used to optimize both load/store and inline asms with memory
5447/// operands. It's also used to sink addressing computations feeding into cold
5448/// call sites into their (cold) basic block.
5449///
5450/// The motivation for handling sinking into cold blocks is that doing so can
5451/// both enable other address mode sinking (by satisfying the register pressure
5452/// constraint above), and reduce register pressure globally (by removing the
5453/// addressing mode computation from the fast path entirely.).
5454bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5455 Type *AccessTy, unsigned AddrSpace) {
5456 Value *Repl = Addr;
5457
5458 // Try to collapse single-value PHI nodes. This is necessary to undo
5459 // unprofitable PRE transformations.
5460 SmallVector<Value *, 8> worklist;
5462 worklist.push_back(Addr);
5463
5464 // Use a worklist to iteratively look through PHI and select nodes, and
5465 // ensure that the addressing mode obtained from the non-PHI/select roots of
5466 // the graph are compatible.
5467 bool PhiOrSelectSeen = false;
5468 SmallVector<Instruction *, 16> AddrModeInsts;
5469 const SimplifyQuery SQ(*DL, TLInfo);
5470 AddressingModeCombiner AddrModes(SQ, Addr);
5471 TypePromotionTransaction TPT(RemovedInsts);
5472 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5473 TPT.getRestorationPoint();
5474 while (!worklist.empty()) {
5475 Value *V = worklist.pop_back_val();
5476
5477 // We allow traversing cyclic Phi nodes.
5478 // In case of success after this loop we ensure that traversing through
5479 // Phi nodes ends up with all cases to compute address of the form
5480 // BaseGV + Base + Scale * Index + Offset
5481 // where Scale and Offset are constans and BaseGV, Base and Index
5482 // are exactly the same Values in all cases.
5483 // It means that BaseGV, Scale and Offset dominate our memory instruction
5484 // and have the same value as they had in address computation represented
5485 // as Phi. So we can safely sink address computation to memory instruction.
5486 if (!Visited.insert(V).second)
5487 continue;
5488
5489 // For a PHI node, push all of its incoming values.
5490 if (PHINode *P = dyn_cast<PHINode>(V)) {
5491 append_range(worklist, P->incoming_values());
5492 PhiOrSelectSeen = true;
5493 continue;
5494 }
5495 // Similar for select.
5496 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5497 worklist.push_back(SI->getFalseValue());
5498 worklist.push_back(SI->getTrueValue());
5499 PhiOrSelectSeen = true;
5500 continue;
5501 }
5502
5503 // For non-PHIs, determine the addressing mode being computed. Note that
5504 // the result may differ depending on what other uses our candidate
5505 // addressing instructions might have.
5506 AddrModeInsts.clear();
5507 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5508 0);
5509 // Defer the query (and possible computation of) the dom tree to point of
5510 // actual use. It's expected that most address matches don't actually need
5511 // the domtree.
5512 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5513 Function *F = MemoryInst->getParent()->getParent();
5514 return this->getDT(*F);
5515 };
5516 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5517 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5518 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5519 BFI.get());
5520
5521 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5522 if (GEP && !NewGEPBases.count(GEP)) {
5523 // If splitting the underlying data structure can reduce the offset of a
5524 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5525 // previously split data structures.
5526 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5527 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5528 }
5529
5530 NewAddrMode.OriginalValue = V;
5531 if (!AddrModes.addNewAddrMode(NewAddrMode))
5532 break;
5533 }
5534
5535 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5536 // or we have multiple but either couldn't combine them or combining them
5537 // wouldn't do anything useful, bail out now.
5538 if (!AddrModes.combineAddrModes()) {
5539 TPT.rollback(LastKnownGood);
5540 return false;
5541 }
5542 bool Modified = TPT.commit();
5543
5544 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5545 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5546
5547 // If all the instructions matched are already in this BB, don't do anything.
5548 // If we saw a Phi node then it is not local definitely, and if we saw a
5549 // select then we want to push the address calculation past it even if it's
5550 // already in this BB.
5551 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5552 return IsNonLocalValue(V, MemoryInst->getParent());
5553 })) {
5554 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5555 << "\n");
5556 return Modified;
5557 }
5558
5559 // Insert this computation right after this user. Since our caller is
5560 // scanning from the top of the BB to the bottom, reuse of the expr are
5561 // guaranteed to happen later.
5562 IRBuilder<> Builder(MemoryInst);
5563
5564 // Now that we determined the addressing expression we want to use and know
5565 // that we have to sink it into this block. Check to see if we have already
5566 // done this for some other load/store instr in this block. If so, reuse
5567 // the computation. Before attempting reuse, check if the address is valid
5568 // as it may have been erased.
5569
5570 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5571
5572 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5573 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5574 if (SunkAddr) {
5575 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5576 << " for " << *MemoryInst << "\n");
5577 if (SunkAddr->getType() != Addr->getType()) {
5578 if (SunkAddr->getType()->getPointerAddressSpace() !=
5579 Addr->getType()->getPointerAddressSpace() &&
5580 !DL->isNonIntegralPointerType(Addr->getType())) {
5581 // There are two reasons the address spaces might not match: a no-op
5582 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5583 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5584 // TODO: allow bitcast between different address space pointers with the
5585 // same size.
5586 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5587 SunkAddr =
5588 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5589 } else
5590 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5591 }
5592 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
5593 SubtargetInfo->addrSinkUsingGEPs())) {
5594 // By default, we use the GEP-based method when AA is used later. This
5595 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5596 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5597 << " for " << *MemoryInst << "\n");
5598 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5599
5600 // First, find the pointer.
5601 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5602 ResultPtr = AddrMode.BaseReg;
5603 AddrMode.BaseReg = nullptr;
5604 }
5605
5606 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5607 // We can't add more than one pointer together, nor can we scale a
5608 // pointer (both of which seem meaningless).
5609 if (ResultPtr || AddrMode.Scale != 1)
5610 return Modified;
5611
5612 ResultPtr = AddrMode.ScaledReg;
5613 AddrMode.Scale = 0;
5614 }
5615
5616 // It is only safe to sign extend the BaseReg if we know that the math
5617 // required to create it did not overflow before we extend it. Since
5618 // the original IR value was tossed in favor of a constant back when
5619 // the AddrMode was created we need to bail out gracefully if widths
5620 // do not match instead of extending it.
5621 //
5622 // (See below for code to add the scale.)
5623 if (AddrMode.Scale) {
5624 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
5625 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
5626 cast<IntegerType>(ScaledRegTy)->getBitWidth())
5627 return Modified;
5628 }
5629
5630 GlobalValue *BaseGV = AddrMode.BaseGV;
5631 if (BaseGV != nullptr) {
5632 if (ResultPtr)
5633 return Modified;
5634
5635 if (BaseGV->isThreadLocal()) {
5636 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
5637 } else {
5638 ResultPtr = BaseGV;
5639 }
5640 }
5641
5642 // If the real base value actually came from an inttoptr, then the matcher
5643 // will look through it and provide only the integer value. In that case,
5644 // use it here.
5645 if (!DL->isNonIntegralPointerType(Addr->getType())) {
5646 if (!ResultPtr && AddrMode.BaseReg) {
5647 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
5648 "sunkaddr");
5649 AddrMode.BaseReg = nullptr;
5650 } else if (!ResultPtr && AddrMode.Scale == 1) {
5651 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
5652 "sunkaddr");
5653 AddrMode.Scale = 0;
5654 }
5655 }
5656
5657 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
5658 !AddrMode.BaseOffs) {
5659 SunkAddr = Constant::getNullValue(Addr->getType());
5660 } else if (!ResultPtr) {
5661 return Modified;
5662 } else {
5663 Type *I8PtrTy =
5664 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
5665
5666 // Start with the base register. Do this first so that subsequent address
5667 // matching finds it last, which will prevent it from trying to match it
5668 // as the scaled value in case it happens to be a mul. That would be
5669 // problematic if we've sunk a different mul for the scale, because then
5670 // we'd end up sinking both muls.
5671 if (AddrMode.BaseReg) {
5672 Value *V = AddrMode.BaseReg;
5673 if (V->getType() != IntPtrTy)
5674 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5675
5676 ResultIndex = V;
5677 }
5678
5679 // Add the scale value.
5680 if (AddrMode.Scale) {
5681 Value *V = AddrMode.ScaledReg;
5682 if (V->getType() == IntPtrTy) {
5683 // done.
5684 } else {
5685 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
5686 cast<IntegerType>(V->getType())->getBitWidth() &&
5687 "We can't transform if ScaledReg is too narrow");
5688 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5689 }
5690
5691 if (AddrMode.Scale != 1)
5692 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5693 "sunkaddr");
5694 if (ResultIndex)
5695 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
5696 else
5697 ResultIndex = V;
5698 }
5699
5700 // Add in the Base Offset if present.
5701 if (AddrMode.BaseOffs) {
5702 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5703 if (ResultIndex) {
5704 // We need to add this separately from the scale above to help with
5705 // SDAG consecutive load/store merging.
5706 if (ResultPtr->getType() != I8PtrTy)
5707 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5708 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
5709 AddrMode.InBounds);
5710 }
5711
5712 ResultIndex = V;
5713 }
5714
5715 if (!ResultIndex) {
5716 SunkAddr = ResultPtr;
5717 } else {
5718 if (ResultPtr->getType() != I8PtrTy)
5719 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5720 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
5721 AddrMode.InBounds);
5722 }
5723
5724 if (SunkAddr->getType() != Addr->getType()) {
5725 if (SunkAddr->getType()->getPointerAddressSpace() !=
5726 Addr->getType()->getPointerAddressSpace() &&
5727 !DL->isNonIntegralPointerType(Addr->getType())) {
5728 // There are two reasons the address spaces might not match: a no-op
5729 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5730 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5731 // TODO: allow bitcast between different address space pointers with
5732 // the same size.
5733 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5734 SunkAddr =
5735 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5736 } else
5737 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5738 }
5739 }
5740 } else {
5741 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
5742 // non-integral pointers, so in that case bail out now.
5743 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
5744 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
5745 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
5746 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
5747 if (DL->isNonIntegralPointerType(Addr->getType()) ||
5748 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
5749 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
5750 (AddrMode.BaseGV &&
5751 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
5752 return Modified;
5753
5754 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5755 << " for " << *MemoryInst << "\n");
5756 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5757 Value *Result = nullptr;
5758
5759 // Start with the base register. Do this first so that subsequent address
5760 // matching finds it last, which will prevent it from trying to match it
5761 // as the scaled value in case it happens to be a mul. That would be
5762 // problematic if we've sunk a different mul for the scale, because then
5763 // we'd end up sinking both muls.
5764 if (AddrMode.BaseReg) {
5765 Value *V = AddrMode.BaseReg;
5766 if (V->getType()->isPointerTy())
5767 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5768 if (V->getType() != IntPtrTy)
5769 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5770 Result = V;
5771 }
5772
5773 // Add the scale value.
5774 if (AddrMode.Scale) {
5775 Value *V = AddrMode.ScaledReg;
5776 if (V->getType() == IntPtrTy) {
5777 // done.
5778 } else if (V->getType()->isPointerTy()) {
5779 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5780 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
5781 cast<IntegerType>(V->getType())->getBitWidth()) {
5782 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5783 } else {
5784 // It is only safe to sign extend the BaseReg if we know that the math
5785 // required to create it did not overflow before we extend it. Since
5786 // the original IR value was tossed in favor of a constant back when
5787 // the AddrMode was created we need to bail out gracefully if widths
5788 // do not match instead of extending it.
5789 Instruction *I = dyn_cast_or_null<Instruction>(Result);
5790 if (I && (Result != AddrMode.BaseReg))
5791 I->eraseFromParent();
5792 return Modified;
5793 }
5794 if (AddrMode.Scale != 1)
5795 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5796 "sunkaddr");
5797 if (Result)
5798 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5799 else
5800 Result = V;
5801 }
5802
5803 // Add in the BaseGV if present.
5804 GlobalValue *BaseGV = AddrMode.BaseGV;
5805 if (BaseGV != nullptr) {
5806 Value *BaseGVPtr;
5807 if (BaseGV->isThreadLocal()) {
5808 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
5809 } else {
5810 BaseGVPtr = BaseGV;
5811 }
5812 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
5813 if (Result)
5814 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5815 else
5816 Result = V;
5817 }
5818
5819 // Add in the Base Offset if present.
5820 if (AddrMode.BaseOffs) {
5821 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5822 if (Result)
5823 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5824 else
5825 Result = V;
5826 }
5827
5828 if (!Result)
5829 SunkAddr = Constant::getNullValue(Addr->getType());
5830 else
5831 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
5832 }
5833
5834 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
5835 // Store the newly computed address into the cache. In the case we reused a
5836 // value, this should be idempotent.
5837 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
5838
5839 // If we have no uses, recursively delete the value and all dead instructions
5840 // using it.
5841 if (Repl->use_empty()) {
5842 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
5843 RecursivelyDeleteTriviallyDeadInstructions(
5844 Repl, TLInfo, nullptr,
5845 [&](Value *V) { removeAllAssertingVHReferences(V); });
5846 });
5847 }
5848 ++NumMemoryInsts;
5849 return true;
5850}
5851
5852/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
5853/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
5854/// only handle a 2 operand GEP in the same basic block or a splat constant
5855/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
5856/// index.
5857///
5858/// If the existing GEP has a vector base pointer that is splat, we can look
5859/// through the splat to find the scalar pointer. If we can't find a scalar
5860/// pointer there's nothing we can do.
5861///
5862/// If we have a GEP with more than 2 indices where the middle indices are all
5863/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
5864///
5865/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
5866/// followed by a GEP with an all zeroes vector index. This will enable
5867/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
5868/// zero index.
5869bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
5870 Value *Ptr) {
5871 Value *NewAddr;
5872
5873 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
5874 // Don't optimize GEPs that don't have indices.
5875 if (!GEP->hasIndices())
5876 return false;
5877
5878 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
5879 // FIXME: We should support this by sinking the GEP.
5880 if (MemoryInst->getParent() != GEP->getParent())
5881 return false;
5882
5883 SmallVector<Value *, 2> Ops(GEP->operands());
5884
5885 bool RewriteGEP = false;
5886
5887 if (Ops[0]->getType()->isVectorTy()) {
5888 Ops[0] = getSplatValue(Ops[0]);
5889 if (!Ops[0])
5890 return false;
5891 RewriteGEP = true;
5892 }
5893
5894 unsigned FinalIndex = Ops.size() - 1;
5895
5896 // Ensure all but the last index is 0.
5897 // FIXME: This isn't strictly required. All that's required is that they are
5898 // all scalars or splats.
5899 for (unsigned i = 1; i < FinalIndex; ++i) {
5900 auto *C = dyn_cast<Constant>(Ops[i]);
5901 if (!C)
5902 return false;
5903 if (isa<VectorType>(C->getType()))
5904 C = C->getSplatValue();
5905 auto *CI = dyn_cast_or_null<ConstantInt>(C);
5906 if (!CI || !CI->isZero())
5907 return false;
5908 // Scalarize the index if needed.
5909 Ops[i] = CI;
5910 }
5911
5912 // Try to scalarize the final index.
5913 if (Ops[FinalIndex]->getType()->isVectorTy()) {
5914 if (Value *V = getSplatValue(Ops[FinalIndex])) {
5915 auto *C = dyn_cast<ConstantInt>(V);
5916 // Don't scalarize all zeros vector.
5917 if (!C || !C->isZero()) {
5918 Ops[FinalIndex] = V;
5919 RewriteGEP = true;
5920 }
5921 }
5922 }
5923
5924 // If we made any changes or the we have extra operands, we need to generate
5925 // new instructions.
5926 if (!RewriteGEP && Ops.size() == 2)
5927 return false;
5928
5929 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
5930
5931 IRBuilder<> Builder(MemoryInst);
5932
5933 Type *SourceTy = GEP->getSourceElementType();
5934 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
5935
5936 // If the final index isn't a vector, emit a scalar GEP containing all ops
5937 // and a vector GEP with all zeroes final index.
5938 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
5939 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
5940 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
5941 auto *SecondTy = GetElementPtrInst::getIndexedType(
5942 SourceTy, ArrayRef(Ops).drop_front());
5943 NewAddr =
5944 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
5945 } else {
5946 Value *Base = Ops[0];
5947 Value *Index = Ops[FinalIndex];
5948
5949 // Create a scalar GEP if there are more than 2 operands.
5950 if (Ops.size() != 2) {
5951 // Replace the last index with 0.
5952 Ops[FinalIndex] =
5953 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
5954 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
5956 SourceTy, ArrayRef(Ops).drop_front());
5957 }
5958
5959 // Now create the GEP with scalar pointer and vector index.
5960 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
5961 }
5962 } else if (!isa<Constant>(Ptr)) {
5963 // Not a GEP, maybe its a splat and we can create a GEP to enable
5964 // SelectionDAGBuilder to use it as a uniform base.
5966 if (!V)
5967 return false;
5968
5969 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
5970
5971 IRBuilder<> Builder(MemoryInst);
5972
5973 // Emit a vector GEP with a scalar pointer and all 0s vector index.
5974 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
5975 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
5976 Type *ScalarTy;
5977 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
5978 Intrinsic::masked_gather) {
5979 ScalarTy = MemoryInst->getType()->getScalarType();
5980 } else {
5981 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
5982 Intrinsic::masked_scatter);
5983 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
5984 }
5985 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
5986 } else {
5987 // Constant, SelectionDAGBuilder knows to check if its a splat.
5988 return false;
5989 }
5990
5991 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
5992
5993 // If we have no uses, recursively delete the value and all dead instructions
5994 // using it.
5995 if (Ptr->use_empty())
5997 Ptr, TLInfo, nullptr,
5998 [&](Value *V) { removeAllAssertingVHReferences(V); });
5999
6000 return true;
6001}
6002
6003/// If there are any memory operands, use OptimizeMemoryInst to sink their
6004/// address computing into the block when possible / profitable.
6005bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6006 bool MadeChange = false;
6007
6008 const TargetRegisterInfo *TRI =
6009 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6010 TargetLowering::AsmOperandInfoVector TargetConstraints =
6011 TLI->ParseConstraints(*DL, TRI, *CS);
6012 unsigned ArgNo = 0;
6013 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6014 // Compute the constraint code and ConstraintType to use.
6015 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6016
6017 // TODO: Also handle C_Address?
6018 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6019 OpInfo.isIndirect) {
6020 Value *OpVal = CS->getArgOperand(ArgNo++);
6021 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6022 } else if (OpInfo.Type == InlineAsm::isInput)
6023 ArgNo++;
6024 }
6025
6026 return MadeChange;
6027}
6028
6029/// Check if all the uses of \p Val are equivalent (or free) zero or
6030/// sign extensions.
6031static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6032 assert(!Val->use_empty() && "Input must have at least one use");
6033 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6034 bool IsSExt = isa<SExtInst>(FirstUser);
6035 Type *ExtTy = FirstUser->getType();
6036 for (const User *U : Val->users()) {
6037 const Instruction *UI = cast<Instruction>(U);
6038 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6039 return false;
6040 Type *CurTy = UI->getType();
6041 // Same input and output types: Same instruction after CSE.
6042 if (CurTy == ExtTy)
6043 continue;
6044
6045 // If IsSExt is true, we are in this situation:
6046 // a = Val
6047 // b = sext ty1 a to ty2
6048 // c = sext ty1 a to ty3
6049 // Assuming ty2 is shorter than ty3, this could be turned into:
6050 // a = Val
6051 // b = sext ty1 a to ty2
6052 // c = sext ty2 b to ty3
6053 // However, the last sext is not free.
6054 if (IsSExt)
6055 return false;
6056
6057 // This is a ZExt, maybe this is free to extend from one type to another.
6058 // In that case, we would not account for a different use.
6059 Type *NarrowTy;
6060 Type *LargeTy;
6061 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6062 CurTy->getScalarType()->getIntegerBitWidth()) {
6063 NarrowTy = CurTy;
6064 LargeTy = ExtTy;
6065 } else {
6066 NarrowTy = ExtTy;
6067 LargeTy = CurTy;
6068 }
6069
6070 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6071 return false;
6072 }
6073 // All uses are the same or can be derived from one another for free.
6074 return true;
6075}
6076
6077/// Try to speculatively promote extensions in \p Exts and continue
6078/// promoting through newly promoted operands recursively as far as doing so is
6079/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6080/// When some promotion happened, \p TPT contains the proper state to revert
6081/// them.
6082///
6083/// \return true if some promotion happened, false otherwise.
6084bool CodeGenPrepare::tryToPromoteExts(
6085 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6086 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6087 unsigned CreatedInstsCost) {
6088 bool Promoted = false;
6089
6090 // Iterate over all the extensions to try to promote them.
6091 for (auto *I : Exts) {
6092 // Early check if we directly have ext(load).
6093 if (isa<LoadInst>(I->getOperand(0))) {
6094 ProfitablyMovedExts.push_back(I);
6095 continue;
6096 }
6097
6098 // Check whether or not we want to do any promotion. The reason we have
6099 // this check inside the for loop is to catch the case where an extension
6100 // is directly fed by a load because in such case the extension can be moved
6101 // up without any promotion on its operands.
6103 return false;
6104
6105 // Get the action to perform the promotion.
6106 TypePromotionHelper::Action TPH =
6107 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6108 // Check if we can promote.
6109 if (!TPH) {
6110 // Save the current extension as we cannot move up through its operand.
6111 ProfitablyMovedExts.push_back(I);
6112 continue;
6113 }
6114
6115 // Save the current state.
6116 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6117 TPT.getRestorationPoint();
6119 unsigned NewCreatedInstsCost = 0;
6120 unsigned ExtCost = !TLI->isExtFree(I);
6121 // Promote.
6122 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6123 &NewExts, nullptr, *TLI);
6124 assert(PromotedVal &&
6125 "TypePromotionHelper should have filtered out those cases");
6126
6127 // We would be able to merge only one extension in a load.
6128 // Therefore, if we have more than 1 new extension we heuristically
6129 // cut this search path, because it means we degrade the code quality.
6130 // With exactly 2, the transformation is neutral, because we will merge
6131 // one extension but leave one. However, we optimistically keep going,
6132 // because the new extension may be removed too. Also avoid replacing a
6133 // single free extension with multiple extensions, as this increases the
6134 // number of IR instructions while not providing any savings.
6135 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6136 // FIXME: It would be possible to propagate a negative value instead of
6137 // conservatively ceiling it to 0.
6138 TotalCreatedInstsCost =
6139 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6140 if (!StressExtLdPromotion &&
6141 (TotalCreatedInstsCost > 1 ||
6142 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6143 (ExtCost == 0 && NewExts.size() > 1))) {
6144 // This promotion is not profitable, rollback to the previous state, and
6145 // save the current extension in ProfitablyMovedExts as the latest
6146 // speculative promotion turned out to be unprofitable.
6147 TPT.rollback(LastKnownGood);
6148 ProfitablyMovedExts.push_back(I);
6149 continue;
6150 }
6151 // Continue promoting NewExts as far as doing so is profitable.
6152 SmallVector<Instruction *, 2> NewlyMovedExts;
6153 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6154 bool NewPromoted = false;
6155 for (auto *ExtInst : NewlyMovedExts) {
6156 Instruction *MovedExt = cast<Instruction>(ExtInst);
6157 Value *ExtOperand = MovedExt->getOperand(0);
6158 // If we have reached to a load, we need this extra profitability check
6159 // as it could potentially be merged into an ext(load).
6160 if (isa<LoadInst>(ExtOperand) &&
6161 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6162 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6163 continue;
6164
6165 ProfitablyMovedExts.push_back(MovedExt);
6166 NewPromoted = true;
6167 }
6168
6169 // If none of speculative promotions for NewExts is profitable, rollback
6170 // and save the current extension (I) as the last profitable extension.
6171 if (!NewPromoted) {
6172 TPT.rollback(LastKnownGood);
6173 ProfitablyMovedExts.push_back(I);
6174 continue;
6175 }
6176 // The promotion is profitable.
6177 Promoted = true;
6178 }
6179 return Promoted;
6180}
6181
6182/// Merging redundant sexts when one is dominating the other.
6183bool CodeGenPrepare::mergeSExts(Function &F) {
6184 bool Changed = false;
6185 for (auto &Entry : ValToSExtendedUses) {
6186 SExts &Insts = Entry.second;
6187 SExts CurPts;
6188 for (Instruction *Inst : Insts) {
6189 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6190 Inst->getOperand(0) != Entry.first)
6191 continue;
6192 bool inserted = false;
6193 for (auto &Pt : CurPts) {
6194 if (getDT(F).dominates(Inst, Pt)) {
6195 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6196 RemovedInsts.insert(Pt);
6197 Pt->removeFromParent();
6198 Pt = Inst;
6199 inserted = true;
6200 Changed = true;
6201 break;
6202 }
6203 if (!getDT(F).dominates(Pt, Inst))
6204 // Give up if we need to merge in a common dominator as the
6205 // experiments show it is not profitable.
6206 continue;
6207 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6208 RemovedInsts.insert(Inst);
6209 Inst->removeFromParent();
6210 inserted = true;
6211 Changed = true;
6212 break;
6213 }
6214 if (!inserted)
6215 CurPts.push_back(Inst);
6216 }
6217 }
6218 return Changed;
6219}
6220
6221// Splitting large data structures so that the GEPs accessing them can have
6222// smaller offsets so that they can be sunk to the same blocks as their users.
6223// For example, a large struct starting from %base is split into two parts
6224// where the second part starts from %new_base.
6225//
6226// Before:
6227// BB0:
6228// %base =
6229//
6230// BB1:
6231// %gep0 = gep %base, off0
6232// %gep1 = gep %base, off1
6233// %gep2 = gep %base, off2
6234//
6235// BB2:
6236// %load1 = load %gep0
6237// %load2 = load %gep1
6238// %load3 = load %gep2
6239//
6240// After:
6241// BB0:
6242// %base =
6243// %new_base = gep %base, off0
6244//
6245// BB1:
6246// %new_gep0 = %new_base
6247// %new_gep1 = gep %new_base, off1 - off0
6248// %new_gep2 = gep %new_base, off2 - off0
6249//
6250// BB2:
6251// %load1 = load i32, i32* %new_gep0
6252// %load2 = load i32, i32* %new_gep1
6253// %load3 = load i32, i32* %new_gep2
6254//
6255// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6256// their offsets are smaller enough to fit into the addressing mode.
6257bool CodeGenPrepare::splitLargeGEPOffsets() {
6258 bool Changed = false;
6259 for (auto &Entry : LargeOffsetGEPMap) {
6260 Value *OldBase = Entry.first;
6262 &LargeOffsetGEPs = Entry.second;
6263 auto compareGEPOffset =
6264 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6265 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6266 if (LHS.first == RHS.first)
6267 return false;
6268 if (LHS.second != RHS.second)
6269 return LHS.second < RHS.second;
6270 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6271 };
6272 // Sorting all the GEPs of the same data structures based on the offsets.
6273 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6274 LargeOffsetGEPs.erase(
6275 std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
6276 LargeOffsetGEPs.end());
6277 // Skip if all the GEPs have the same offsets.
6278 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6279 continue;
6280 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6281 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6282 Value *NewBaseGEP = nullptr;
6283
6284 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6286 LLVMContext &Ctx = GEP->getContext();
6287 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6288 Type *I8PtrTy =
6289 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6290
6291 BasicBlock::iterator NewBaseInsertPt;
6292 BasicBlock *NewBaseInsertBB;
6293 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6294 // If the base of the struct is an instruction, the new base will be
6295 // inserted close to it.
6296 NewBaseInsertBB = BaseI->getParent();
6297 if (isa<PHINode>(BaseI))
6298 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6299 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6300 NewBaseInsertBB =
6301 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6302 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6303 } else
6304 NewBaseInsertPt = std::next(BaseI->getIterator());
6305 } else {
6306 // If the current base is an argument or global value, the new base
6307 // will be inserted to the entry block.
6308 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6309 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6310 }
6311 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6312 // Create a new base.
6313 Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6314 NewBaseGEP = OldBase;
6315 if (NewBaseGEP->getType() != I8PtrTy)
6316 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6317 NewBaseGEP =
6318 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6319 NewGEPBases.insert(NewBaseGEP);
6320 return;
6321 };
6322
6323 // Check whether all the offsets can be encoded with prefered common base.
6324 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6325 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6326 BaseOffset = PreferBase;
6327 // Create a new base if the offset of the BaseGEP can be decoded with one
6328 // instruction.
6329 createNewBase(BaseOffset, OldBase, BaseGEP);
6330 }
6331
6332 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6333 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6334 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6335 int64_t Offset = LargeOffsetGEP->second;
6336 if (Offset != BaseOffset) {
6338 AddrMode.HasBaseReg = true;
6339 AddrMode.BaseOffs = Offset - BaseOffset;
6340 // The result type of the GEP might not be the type of the memory
6341 // access.
6342 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6343 GEP->getResultElementType(),
6344 GEP->getAddressSpace())) {
6345 // We need to create a new base if the offset to the current base is
6346 // too large to fit into the addressing mode. So, a very large struct
6347 // may be split into several parts.
6348 BaseGEP = GEP;
6349 BaseOffset = Offset;
6350 NewBaseGEP = nullptr;
6351 }
6352 }
6353
6354 // Generate a new GEP to replace the current one.
6355 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6356
6357 if (!NewBaseGEP) {
6358 // Create a new base if we don't have one yet. Find the insertion
6359 // pointer for the new base first.
6360 createNewBase(BaseOffset, OldBase, GEP);
6361 }
6362
6363 IRBuilder<> Builder(GEP);
6364 Value *NewGEP = NewBaseGEP;
6365 if (Offset != BaseOffset) {
6366 // Calculate the new offset for the new GEP.
6367 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6368 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6369 }
6370 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6371 LargeOffsetGEPID.erase(GEP);
6372 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6373 GEP->eraseFromParent();
6374 Changed = true;
6375 }
6376 }
6377 return Changed;
6378}
6379
6380bool CodeGenPrepare::optimizePhiType(
6382 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6383 // We are looking for a collection on interconnected phi nodes that together
6384 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6385 // are of the same type. Convert the whole set of nodes to the type of the
6386 // bitcast.
6387 Type *PhiTy = I->getType();
6388 Type *ConvertTy = nullptr;
6389 if (Visited.count(I) ||
6390 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6391 return false;
6392
6394 Worklist.push_back(cast<Instruction>(I));
6397 PhiNodes.insert(I);
6398 Visited.insert(I);
6401 // This works by adding extra bitcasts between load/stores and removing
6402 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6403 // we can get in the situation where we remove a bitcast in one iteration
6404 // just to add it again in the next. We need to ensure that at least one
6405 // bitcast we remove are anchored to something that will not change back.
6406 bool AnyAnchored = false;
6407
6408 while (!Worklist.empty()) {
6409 Instruction *II = Worklist.pop_back_val();
6410
6411 if (auto *Phi = dyn_cast<PHINode>(II)) {
6412 // Handle Defs, which might also be PHI's
6413 for (Value *V : Phi->incoming_values()) {
6414 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6415 if (!PhiNodes.count(OpPhi)) {
6416 if (!Visited.insert(OpPhi).second)
6417 return false;
6418 PhiNodes.insert(OpPhi);
6419 Worklist.push_back(OpPhi);
6420 }
6421 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6422 if (!OpLoad->isSimple())
6423 return false;
6424 if (Defs.insert(OpLoad).second)
6425 Worklist.push_back(OpLoad);
6426 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6427 if (Defs.insert(OpEx).second)
6428 Worklist.push_back(OpEx);
6429 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6430 if (!ConvertTy)
6431 ConvertTy = OpBC->getOperand(0)->getType();
6432 if (OpBC->getOperand(0)->getType() != ConvertTy)
6433 return false;
6434 if (Defs.insert(OpBC).second) {
6435 Worklist.push_back(OpBC);
6436 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6437 !isa<ExtractElementInst>(OpBC->getOperand(0));
6438 }
6439 } else if (auto *OpC = dyn_cast<ConstantData>(V))
6440 Constants.insert(OpC);
6441 else
6442 return false;
6443 }
6444 }
6445
6446 // Handle uses which might also be phi's
6447 for (User *V : II->users()) {
6448 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6449 if (!PhiNodes.count(OpPhi)) {
6450 if (Visited.count(OpPhi))
6451 return false;
6452 PhiNodes.insert(OpPhi);
6453 Visited.insert(OpPhi);
6454 Worklist.push_back(OpPhi);
6455 }
6456 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6457 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6458 return false;
6459 Uses.insert(OpStore);
6460 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6461 if (!ConvertTy)
6462 ConvertTy = OpBC->getType();
6463 if (OpBC->getType() != ConvertTy)
6464 return false;
6465 Uses.insert(OpBC);
6466 AnyAnchored |=
6467 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6468 } else {
6469 return false;
6470 }
6471 }
6472 }
6473
6474 if (!ConvertTy || !AnyAnchored ||
6475 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6476 return false;
6477
6478 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6479 << *ConvertTy << "\n");
6480
6481 // Create all the new phi nodes of the new type, and bitcast any loads to the
6482 // correct type.
6483 ValueToValueMap ValMap;
6484 for (ConstantData *C : Constants)
6485 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
6486 for (Instruction *D : Defs) {
6487 if (isa<BitCastInst>(D)) {
6488 ValMap[D] = D->getOperand(0);
6489 DeletedInstrs.insert(D);
6490 } else {
6491 BasicBlock::iterator insertPt = std::next(D->getIterator());
6492 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
6493 }
6494 }
6495 for (PHINode *Phi : PhiNodes)
6496 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6497 Phi->getName() + ".tc", Phi->getIterator());
6498 // Pipe together all the PhiNodes.
6499 for (PHINode *Phi : PhiNodes) {
6500 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6501 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6502 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6503 Phi->getIncomingBlock(i));
6504 Visited.insert(NewPhi);
6505 }
6506 // And finally pipe up the stores and bitcasts
6507 for (Instruction *U : Uses) {
6508 if (isa<BitCastInst>(U)) {
6509 DeletedInstrs.insert(U);
6510 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6511 } else {
6512 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
6513 U->getIterator()));
6514 }
6515 }
6516
6517 // Save the removed phis to be deleted later.
6518 for (PHINode *Phi : PhiNodes)
6519 DeletedInstrs.insert(Phi);
6520 return true;
6521}
6522
6523bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6524 if (!OptimizePhiTypes)
6525 return false;
6526
6527 bool Changed = false;
6529 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6530
6531 // Attempt to optimize all the phis in the functions to the correct type.
6532 for (auto &BB : F)
6533 for (auto &Phi : BB.phis())
6534 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6535
6536 // Remove any old phi's that have been converted.
6537 for (auto *I : DeletedInstrs) {
6538 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6539 I->eraseFromParent();
6540 }
6541
6542 return Changed;
6543}
6544
6545/// Return true, if an ext(load) can be formed from an extension in
6546/// \p MovedExts.
6547bool CodeGenPrepare::canFormExtLd(
6548 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6549 Instruction *&Inst, bool HasPromoted) {
6550 for (auto *MovedExtInst : MovedExts) {
6551 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6552 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6553 Inst = MovedExtInst;
6554 break;
6555 }
6556 }
6557 if (!LI)
6558 return false;
6559
6560 // If they're already in the same block, there's nothing to do.
6561 // Make the cheap checks first if we did not promote.
6562 // If we promoted, we need to check if it is indeed profitable.
6563 if (!HasPromoted && LI->getParent() == Inst->getParent())
6564 return false;
6565
6566 return TLI->isExtLoad(LI, Inst, *DL);
6567}
6568
6569/// Move a zext or sext fed by a load into the same basic block as the load,
6570/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6571/// extend into the load.
6572///
6573/// E.g.,
6574/// \code
6575/// %ld = load i32* %addr
6576/// %add = add nuw i32 %ld, 4
6577/// %zext = zext i32 %add to i64
6578// \endcode
6579/// =>
6580/// \code
6581/// %ld = load i32* %addr
6582/// %zext = zext i32 %ld to i64
6583/// %add = add nuw i64 %zext, 4
6584/// \encode
6585/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6586/// allow us to match zext(load i32*) to i64.
6587///
6588/// Also, try to promote the computations used to obtain a sign extended
6589/// value used into memory accesses.
6590/// E.g.,
6591/// \code
6592/// a = add nsw i32 b, 3
6593/// d = sext i32 a to i64
6594/// e = getelementptr ..., i64 d
6595/// \endcode
6596/// =>
6597/// \code
6598/// f = sext i32 b to i64
6599/// a = add nsw i64 f, 3
6600/// e = getelementptr ..., i64 a
6601/// \endcode
6602///
6603/// \p Inst[in/out] the extension may be modified during the process if some
6604/// promotions apply.
6605bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6606 bool AllowPromotionWithoutCommonHeader = false;
6607 /// See if it is an interesting sext operations for the address type
6608 /// promotion before trying to promote it, e.g., the ones with the right
6609 /// type and used in memory accesses.
6610 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
6611 *Inst, AllowPromotionWithoutCommonHeader);
6612 TypePromotionTransaction TPT(RemovedInsts);
6613 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6614 TPT.getRestorationPoint();
6616 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
6617 Exts.push_back(Inst);
6618
6619 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
6620
6621 // Look for a load being extended.
6622 LoadInst *LI = nullptr;
6623 Instruction *ExtFedByLoad;
6624
6625 // Try to promote a chain of computation if it allows to form an extended
6626 // load.
6627 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
6628 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
6629 TPT.commit();
6630 // Move the extend into the same block as the load.
6631 ExtFedByLoad->moveAfter(LI);
6632 ++NumExtsMoved;
6633 Inst = ExtFedByLoad;
6634 return true;
6635 }
6636
6637 // Continue promoting SExts if known as considerable depending on targets.
6638 if (ATPConsiderable &&
6639 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
6640 HasPromoted, TPT, SpeculativelyMovedExts))
6641 return true;
6642
6643 TPT.rollback(LastKnownGood);
6644 return false;
6645}
6646
6647// Perform address type promotion if doing so is profitable.
6648// If AllowPromotionWithoutCommonHeader == false, we should find other sext
6649// instructions that sign extended the same initial value. However, if
6650// AllowPromotionWithoutCommonHeader == true, we expect promoting the
6651// extension is just profitable.
6652bool CodeGenPrepare::performAddressTypePromotion(
6653 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
6654 bool HasPromoted, TypePromotionTransaction &TPT,
6655 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
6656 bool Promoted = false;
6657 SmallPtrSet<Instruction *, 1> UnhandledExts;
6658 bool AllSeenFirst = true;
6659 for (auto *I : SpeculativelyMovedExts) {
6660 Value *HeadOfChain = I->getOperand(0);
6662 SeenChainsForSExt.find(HeadOfChain);
6663 // If there is an unhandled SExt which has the same header, try to promote
6664 // it as well.
6665 if (AlreadySeen != SeenChainsForSExt.end()) {
6666 if (AlreadySeen->second != nullptr)
6667 UnhandledExts.insert(AlreadySeen->second);
6668 AllSeenFirst = false;
6669 }
6670 }
6671
6672 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
6673 SpeculativelyMovedExts.size() == 1)) {
6674 TPT.commit();
6675 if (HasPromoted)
6676 Promoted = true;
6677 for (auto *I : SpeculativelyMovedExts) {
6678 Value *HeadOfChain = I->getOperand(0);
6679 SeenChainsForSExt[HeadOfChain] = nullptr;
6680 ValToSExtendedUses[HeadOfChain].push_back(I);
6681 }
6682 // Update Inst as promotion happen.
6683 Inst = SpeculativelyMovedExts.pop_back_val();
6684 } else {
6685 // This is the first chain visited from the header, keep the current chain
6686 // as unhandled. Defer to promote this until we encounter another SExt
6687 // chain derived from the same header.
6688 for (auto *I : SpeculativelyMovedExts) {
6689 Value *HeadOfChain = I->getOperand(0);
6690 SeenChainsForSExt[HeadOfChain] = Inst;
6691 }
6692 return false;
6693 }
6694
6695 if (!AllSeenFirst && !UnhandledExts.empty())
6696 for (auto *VisitedSExt : UnhandledExts) {
6697 if (RemovedInsts.count(VisitedSExt))
6698 continue;
6699 TypePromotionTransaction TPT(RemovedInsts);
6702 Exts.push_back(VisitedSExt);
6703 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
6704 TPT.commit();
6705 if (HasPromoted)
6706 Promoted = true;
6707 for (auto *I : Chains) {
6708 Value *HeadOfChain = I->getOperand(0);
6709 // Mark this as handled.
6710 SeenChainsForSExt[HeadOfChain] = nullptr;
6711 ValToSExtendedUses[HeadOfChain].push_back(I);
6712 }
6713 }
6714 return Promoted;
6715}
6716
6717bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
6718 BasicBlock *DefBB = I->getParent();
6719
6720 // If the result of a {s|z}ext and its source are both live out, rewrite all
6721 // other uses of the source with result of extension.
6722 Value *Src = I->getOperand(0);
6723 if (Src->hasOneUse())
6724 return false;
6725
6726 // Only do this xform if truncating is free.
6727 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
6728 return false;
6729
6730 // Only safe to perform the optimization if the source is also defined in
6731 // this block.
6732 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
6733 return false;
6734
6735 bool DefIsLiveOut = false;
6736 for (User *U : I->users()) {
6737 Instruction *UI = cast<Instruction>(U);
6738
6739 // Figure out which BB this ext is used in.
6740 BasicBlock *UserBB = UI->getParent();
6741 if (UserBB == DefBB)
6742 continue;
6743 DefIsLiveOut = true;
6744 break;
6745 }
6746 if (!DefIsLiveOut)
6747 return false;
6748
6749 // Make sure none of the uses are PHI nodes.
6750 for (User *U : Src->users()) {
6751 Instruction *UI = cast<Instruction>(U);
6752 BasicBlock *UserBB = UI->getParent();
6753 if (UserBB == DefBB)
6754 continue;
6755 // Be conservative. We don't want this xform to end up introducing
6756 // reloads just before load / store instructions.
6757 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
6758 return false;
6759 }
6760
6761 // InsertedTruncs - Only insert one trunc in each block once.
6763
6764 bool MadeChange = false;
6765 for (Use &U : Src->uses()) {
6766 Instruction *User = cast<Instruction>(U.getUser());
6767
6768 // Figure out which BB this ext is used in.
6769 BasicBlock *UserBB = User->getParent();
6770 if (UserBB == DefBB)
6771 continue;
6772
6773 // Both src and def are live in this block. Rewrite the use.
6774 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
6775
6776 if (!InsertedTrunc) {
6777 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
6778 assert(InsertPt != UserBB->end());
6779 InsertedTrunc = new TruncInst(I, Src->getType(), "");
6780 InsertedTrunc->insertBefore(*UserBB, InsertPt);
6781 InsertedInsts.insert(InsertedTrunc);
6782 }
6783
6784 // Replace a use of the {s|z}ext source with a use of the result.
6785 U = InsertedTrunc;
6786 ++NumExtUses;
6787 MadeChange = true;
6788 }
6789
6790 return MadeChange;
6791}
6792
6793// Find loads whose uses only use some of the loaded value's bits. Add an "and"
6794// just after the load if the target can fold this into one extload instruction,
6795// with the hope of eliminating some of the other later "and" instructions using
6796// the loaded value. "and"s that are made trivially redundant by the insertion
6797// of the new "and" are removed by this function, while others (e.g. those whose
6798// path from the load goes through a phi) are left for isel to potentially
6799// remove.
6800//
6801// For example:
6802//
6803// b0:
6804// x = load i32
6805// ...
6806// b1:
6807// y = and x, 0xff
6808// z = use y
6809//
6810// becomes:
6811//
6812// b0:
6813// x = load i32
6814// x' = and x, 0xff
6815// ...
6816// b1:
6817// z = use x'
6818//
6819// whereas:
6820//
6821// b0:
6822// x1 = load i32
6823// ...
6824// b1:
6825// x2 = load i32
6826// ...
6827// b2:
6828// x = phi x1, x2
6829// y = and x, 0xff
6830//
6831// becomes (after a call to optimizeLoadExt for each load):
6832//
6833// b0:
6834// x1 = load i32
6835// x1' = and x1, 0xff
6836// ...
6837// b1:
6838// x2 = load i32
6839// x2' = and x2, 0xff
6840// ...
6841// b2:
6842// x = phi x1', x2'
6843// y = and x, 0xff
6844bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
6845 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
6846 return false;
6847
6848 // Skip loads we've already transformed.
6849 if (Load->hasOneUse() &&
6850 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
6851 return false;
6852
6853 // Look at all uses of Load, looking through phis, to determine how many bits
6854 // of the loaded value are needed.
6857 SmallVector<Instruction *, 8> AndsToMaybeRemove;
6858 for (auto *U : Load->users())
6859 WorkList.push_back(cast<Instruction>(U));
6860
6861 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
6862 unsigned BitWidth = LoadResultVT.getSizeInBits();
6863 // If the BitWidth is 0, do not try to optimize the type
6864 if (BitWidth == 0)
6865 return false;
6866
6867 APInt DemandBits(BitWidth, 0);
6868 APInt WidestAndBits(BitWidth, 0);
6869
6870 while (!WorkList.empty()) {
6871 Instruction *I = WorkList.pop_back_val();
6872
6873 // Break use-def graph loops.
6874 if (!Visited.insert(I).second)
6875 continue;
6876
6877 // For a PHI node, push all of its users.
6878 if (auto *Phi = dyn_cast<PHINode>(I)) {
6879 for (auto *U : Phi->users())
6880 WorkList.push_back(cast<Instruction>(U));
6881 continue;
6882 }
6883
6884 switch (I->getOpcode()) {
6885 case Instruction::And: {
6886 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
6887 if (!AndC)
6888 return false;
6889 APInt AndBits = AndC->getValue();
6890 DemandBits |= AndBits;
6891 // Keep track of the widest and mask we see.
6892 if (AndBits.ugt(WidestAndBits))
6893 WidestAndBits = AndBits;
6894 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
6895 AndsToMaybeRemove.push_back(I);
6896 break;
6897 }
6898
6899 case Instruction::Shl: {
6900 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
6901 if (!ShlC)
6902 return false;
6903 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
6904 DemandBits.setLowBits(BitWidth - ShiftAmt);
6905 break;
6906 }
6907
6908 case Instruction::Trunc: {
6909 EVT TruncVT = TLI->getValueType(*DL, I->getType());
6910 unsigned TruncBitWidth = TruncVT.getSizeInBits();
6911 DemandBits.setLowBits(TruncBitWidth);
6912 break;
6913 }
6914
6915 default:
6916 return false;
6917 }
6918 }
6919
6920 uint32_t ActiveBits = DemandBits.getActiveBits();
6921 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
6922 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
6923 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
6924 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
6925 // followed by an AND.
6926 // TODO: Look into removing this restriction by fixing backends to either
6927 // return false for isLoadExtLegal for i1 or have them select this pattern to
6928 // a single instruction.
6929 //
6930 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
6931 // mask, since these are the only ands that will be removed by isel.
6932 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
6933 WidestAndBits != DemandBits)
6934 return false;
6935
6936 LLVMContext &Ctx = Load->getType()->getContext();
6937 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
6938 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
6939
6940 // Reject cases that won't be matched as extloads.
6941 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
6942 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
6943 return false;
6944
6945 IRBuilder<> Builder(Load->getNextNonDebugInstruction());
6946 auto *NewAnd = cast<Instruction>(
6947 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
6948 // Mark this instruction as "inserted by CGP", so that other
6949 // optimizations don't touch it.
6950 InsertedInsts.insert(NewAnd);
6951
6952 // Replace all uses of load with new and (except for the use of load in the
6953 // new and itself).
6954 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
6955 NewAnd->setOperand(0, Load);
6956
6957 // Remove any and instructions that are now redundant.
6958 for (auto *And : AndsToMaybeRemove)
6959 // Check that the and mask is the same as the one we decided to put on the
6960 // new and.
6961 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
6962 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
6963 if (&*CurInstIterator == And)
6964 CurInstIterator = std::next(And->getIterator());
6965 And->eraseFromParent();
6966 ++NumAndUses;
6967 }
6968
6969 ++NumAndsAdded;
6970 return true;
6971}
6972
6973/// Check if V (an operand of a select instruction) is an expensive instruction
6974/// that is only used once.
6976 auto *I = dyn_cast<Instruction>(V);
6977 // If it's safe to speculatively execute, then it should not have side
6978 // effects; therefore, it's safe to sink and possibly *not* execute.
6979 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
6981}
6982
6983/// Returns true if a SelectInst should be turned into an explicit branch.
6985 const TargetLowering *TLI,
6986 SelectInst *SI) {
6987 // If even a predictable select is cheap, then a branch can't be cheaper.
6988 if (!TLI->isPredictableSelectExpensive())
6989 return false;
6990
6991 // FIXME: This should use the same heuristics as IfConversion to determine
6992 // whether a select is better represented as a branch.
6993
6994 // If metadata tells us that the select condition is obviously predictable,
6995 // then we want to replace the select with a branch.
6996 uint64_t TrueWeight, FalseWeight;
6997 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
6998 uint64_t Max = std::max(TrueWeight, FalseWeight);
6999 uint64_t Sum = TrueWeight + FalseWeight;
7000 if (Sum != 0) {
7001 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7002 if (Probability > TTI->getPredictableBranchThreshold())
7003 return true;
7004 }
7005 }
7006
7007 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7008
7009 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7010 // comparison condition. If the compare has more than one use, there's
7011 // probably another cmov or setcc around, so it's not worth emitting a branch.
7012 if (!Cmp || !Cmp->hasOneUse())
7013 return false;
7014
7015 // If either operand of the select is expensive and only needed on one side
7016 // of the select, we should form a branch.
7017 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7018 sinkSelectOperand(TTI, SI->getFalseValue()))
7019 return true;
7020
7021 return false;
7022}
7023
7024/// If \p isTrue is true, return the true value of \p SI, otherwise return
7025/// false value of \p SI. If the true/false value of \p SI is defined by any
7026/// select instructions in \p Selects, look through the defining select
7027/// instruction until the true/false value is not defined in \p Selects.
7028static Value *
7030 const SmallPtrSet<const Instruction *, 2> &Selects) {
7031 Value *V = nullptr;
7032
7033 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7034 DefSI = dyn_cast<SelectInst>(V)) {
7035 assert(DefSI->getCondition() == SI->getCondition() &&
7036 "The condition of DefSI does not match with SI");
7037 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7038 }
7039
7040 assert(V && "Failed to get select true/false value");
7041 return V;
7042}
7043
7044bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7045 assert(Shift->isShift() && "Expected a shift");
7046
7047 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7048 // general vector shifts, and (3) the shift amount is a select-of-splatted
7049 // values, hoist the shifts before the select:
7050 // shift Op0, (select Cond, TVal, FVal) -->
7051 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7052 //
7053 // This is inverting a generic IR transform when we know that the cost of a
7054 // general vector shift is more than the cost of 2 shift-by-scalars.
7055 // We can't do this effectively in SDAG because we may not be able to
7056 // determine if the select operands are splats from within a basic block.
7057 Type *Ty = Shift->getType();
7058 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
7059 return false;
7060 Value *Cond, *TVal, *FVal;
7061 if (!match(Shift->getOperand(1),
7062 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7063 return false;
7064 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7065 return false;
7066
7067 IRBuilder<> Builder(Shift);
7068 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7069 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7070 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7071 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7072 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7073 Shift->eraseFromParent();
7074 return true;
7075}
7076
7077bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7078 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7079 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7080 "Expected a funnel shift");
7081
7082 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7083 // than general vector shifts, and (3) the shift amount is select-of-splatted
7084 // values, hoist the funnel shifts before the select:
7085 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7086 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7087 //
7088 // This is inverting a generic IR transform when we know that the cost of a
7089 // general vector shift is more than the cost of 2 shift-by-scalars.
7090 // We can't do this effectively in SDAG because we may not be able to
7091 // determine if the select operands are splats from within a basic block.
7092 Type *Ty = Fsh->getType();
7093 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
7094 return false;
7095 Value *Cond, *TVal, *FVal;
7096 if (!match(Fsh->getOperand(2),
7097 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7098 return false;
7099 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7100 return false;
7101
7102 IRBuilder<> Builder(Fsh);
7103 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7104 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7105 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7106 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7107 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7108 Fsh->eraseFromParent();
7109 return true;
7110}
7111
7112/// If we have a SelectInst that will likely profit from branch prediction,
7113/// turn it into a branch.
7114bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7116 return false;
7117
7118 // If the SelectOptimize pass is enabled, selects have already been optimized.
7120 return false;
7121
7122 // Find all consecutive select instructions that share the same condition.
7124 ASI.push_back(SI);
7126 It != SI->getParent()->end(); ++It) {
7127 SelectInst *I = dyn_cast<SelectInst>(&*It);
7128 if (I && SI->getCondition() == I->getCondition()) {
7129 ASI.push_back(I);
7130 } else {
7131 break;
7132 }
7133 }
7134
7135 SelectInst *LastSI = ASI.back();
7136 // Increment the current iterator to skip all the rest of select instructions
7137 // because they will be either "not lowered" or "all lowered" to branch.
7138 CurInstIterator = std::next(LastSI->getIterator());
7139 // Examine debug-info attached to the consecutive select instructions. They
7140 // won't be individually optimised by optimizeInst, so we need to perform
7141 // DbgVariableRecord maintenence here instead.
7142 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7143 fixupDbgVariableRecordsOnInst(*SI);
7144
7145 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7146
7147 // Can we convert the 'select' to CF ?
7148 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7149 return false;
7150
7152 if (SI->getType()->isVectorTy())
7153 SelectKind = TargetLowering::ScalarCondVectorVal;
7154 else
7155 SelectKind = TargetLowering::ScalarValSelect;
7156
7157 if (TLI->isSelectSupported(SelectKind) &&
7158 (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
7159 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7160 return false;
7161
7162 // The DominatorTree needs to be rebuilt by any consumers after this
7163 // transformation. We simply reset here rather than setting the ModifiedDT
7164 // flag to avoid restarting the function walk in runOnFunction for each
7165 // select optimized.
7166 DT.reset();
7167
7168 // Transform a sequence like this:
7169 // start:
7170 // %cmp = cmp uge i32 %a, %b
7171 // %sel = select i1 %cmp, i32 %c, i32 %d
7172 //
7173 // Into:
7174 // start:
7175 // %cmp = cmp uge i32 %a, %b
7176 // %cmp.frozen = freeze %cmp
7177 // br i1 %cmp.frozen, label %select.true, label %select.false
7178 // select.true:
7179 // br label %select.end
7180 // select.false:
7181 // br label %select.end
7182 // select.end:
7183 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7184 //
7185 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7186 // In addition, we may sink instructions that produce %c or %d from
7187 // the entry block into the destination(s) of the new branch.
7188 // If the true or false blocks do not contain a sunken instruction, that
7189 // block and its branch may be optimized away. In that case, one side of the
7190 // first branch will point directly to select.end, and the corresponding PHI
7191 // predecessor block will be the start block.
7192
7193 // Collect values that go on the true side and the values that go on the false
7194 // side.
7195 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7196 for (SelectInst *SI : ASI) {
7197 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7198 TrueInstrs.push_back(cast<Instruction>(V));
7199 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7200 FalseInstrs.push_back(cast<Instruction>(V));
7201 }
7202
7203 // Split the select block, according to how many (if any) values go on each
7204 // side.
7205 BasicBlock *StartBlock = SI->getParent();
7206 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7207 // We should split before any debug-info.
7208 SplitPt.setHeadBit(true);
7209
7210 IRBuilder<> IB(SI);
7211 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7212
7213 BasicBlock *TrueBlock = nullptr;
7214 BasicBlock *FalseBlock = nullptr;
7215 BasicBlock *EndBlock = nullptr;
7216 BranchInst *TrueBranch = nullptr;
7217 BranchInst *FalseBranch = nullptr;
7218 if (TrueInstrs.size() == 0) {
7219 FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
7220 CondFr, SplitPt, false, nullptr, nullptr, LI));
7221 FalseBlock = FalseBranch->getParent();
7222 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7223 } else if (FalseInstrs.size() == 0) {
7224 TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
7225 CondFr, SplitPt, false, nullptr, nullptr, LI));
7226 TrueBlock = TrueBranch->getParent();
7227 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7228 } else {
7229 Instruction *ThenTerm = nullptr;
7230 Instruction *ElseTerm = nullptr;
7231 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7232 nullptr, nullptr, LI);
7233 TrueBranch = cast<BranchInst>(ThenTerm);
7234 FalseBranch = cast<BranchInst>(ElseTerm);
7235 TrueBlock = TrueBranch->getParent();
7236 FalseBlock = FalseBranch->getParent();
7237 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7238 }
7239
7240 EndBlock->setName("select.end");
7241 if (TrueBlock)
7242 TrueBlock->setName("select.true.sink");
7243 if (FalseBlock)
7244 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7245 : "select.false.sink");
7246
7247 if (IsHugeFunc) {
7248 if (TrueBlock)
7249 FreshBBs.insert(TrueBlock);
7250 if (FalseBlock)
7251 FreshBBs.insert(FalseBlock);
7252 FreshBBs.insert(EndBlock);
7253 }
7254
7255 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7256
7257 static const unsigned MD[] = {
7258 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7259 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7260 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7261
7262 // Sink expensive instructions into the conditional blocks to avoid executing
7263 // them speculatively.
7264 for (Instruction *I : TrueInstrs)
7265 I->moveBefore(TrueBranch);
7266 for (Instruction *I : FalseInstrs)
7267 I->moveBefore(FalseBranch);
7268
7269 // If we did not create a new block for one of the 'true' or 'false' paths
7270 // of the condition, it means that side of the branch goes to the end block
7271 // directly and the path originates from the start block from the point of
7272 // view of the new PHI.
7273 if (TrueBlock == nullptr)
7274 TrueBlock = StartBlock;
7275 else if (FalseBlock == nullptr)
7276 FalseBlock = StartBlock;
7277
7279 INS.insert(ASI.begin(), ASI.end());
7280 // Use reverse iterator because later select may use the value of the
7281 // earlier select, and we need to propagate value through earlier select
7282 // to get the PHI operand.
7283 for (SelectInst *SI : llvm::reverse(ASI)) {
7284 // The select itself is replaced with a PHI Node.
7285 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7286 PN->insertBefore(EndBlock->begin());
7287 PN->takeName(SI);
7288 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7289 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7290 PN->setDebugLoc(SI->getDebugLoc());
7291
7292 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7293 SI->eraseFromParent();
7294 INS.erase(SI);
7295 ++NumSelectsExpanded;
7296 }
7297
7298 // Instruct OptimizeBlock to skip to the next block.
7299 CurInstIterator = StartBlock->end();
7300 return true;
7301}
7302
7303/// Some targets only accept certain types for splat inputs. For example a VDUP
7304/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7305/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7306bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7307 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7309 m_Undef(), m_ZeroMask())))
7310 return false;
7311 Type *NewType = TLI->shouldConvertSplatType(SVI);
7312 if (!NewType)
7313 return false;
7314
7315 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7316 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7317 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7318 "Expected a type of the same size!");
7319 auto *NewVecType =
7320 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7321
7322 // Create a bitcast (shuffle (insert (bitcast(..))))
7323 IRBuilder<> Builder(SVI->getContext());
7324 Builder.SetInsertPoint(SVI);
7325 Value *BC1 = Builder.CreateBitCast(
7326 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7327 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7328 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7329
7330 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7332 SVI, TLInfo, nullptr,
7333 [&](Value *V) { removeAllAssertingVHReferences(V); });
7334
7335 // Also hoist the bitcast up to its operand if it they are not in the same
7336 // block.
7337 if (auto *BCI = dyn_cast<Instruction>(BC1))
7338 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7339 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7340 !Op->isTerminator() && !Op->isEHPad())
7341 BCI->moveAfter(Op);
7342
7343 return true;
7344}
7345
7346bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7347 // If the operands of I can be folded into a target instruction together with
7348 // I, duplicate and sink them.
7349 SmallVector<Use *, 4> OpsToSink;
7350 if (!TLI->shouldSinkOperands(I, OpsToSink))
7351 return false;
7352
7353 // OpsToSink can contain multiple uses in a use chain (e.g.
7354 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7355 // uses must come first, so we process the ops in reverse order so as to not
7356 // create invalid IR.
7357 BasicBlock *TargetBB = I->getParent();
7358 bool Changed = false;
7359 SmallVector<Use *, 4> ToReplace;
7360 Instruction *InsertPoint = I;
7362 unsigned long InstNumber = 0;
7363 for (const auto &I : *TargetBB)
7364 InstOrdering[&I] = InstNumber++;
7365
7366 for (Use *U : reverse(OpsToSink)) {
7367 auto *UI = cast<Instruction>(U->get());
7368 if (isa<PHINode>(UI))
7369 continue;
7370 if (UI->getParent() == TargetBB) {
7371 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7372 InsertPoint = UI;
7373 continue;
7374 }
7375 ToReplace.push_back(U);
7376 }
7377
7378 SetVector<Instruction *> MaybeDead;
7380 for (Use *U : ToReplace) {
7381 auto *UI = cast<Instruction>(U->get());
7382 Instruction *NI = UI->clone();
7383
7384 if (IsHugeFunc) {
7385 // Now we clone an instruction, its operands' defs may sink to this BB
7386 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7387 for (unsigned I = 0; I < NI->getNumOperands(); ++I) {
7388 auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I));
7389 if (!OpDef)
7390 continue;
7391 FreshBBs.insert(OpDef->getParent());
7392 }
7393 }
7394
7395 NewInstructions[UI] = NI;
7396 MaybeDead.insert(UI);
7397 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7398 NI->insertBefore(InsertPoint);
7399 InsertPoint = NI;
7400 InsertedInsts.insert(NI);
7401
7402 // Update the use for the new instruction, making sure that we update the
7403 // sunk instruction uses, if it is part of a chain that has already been
7404 // sunk.
7405 Instruction *OldI = cast<Instruction>(U->getUser());
7406 if (NewInstructions.count(OldI))
7407 NewInstructions[OldI]->setOperand(U->getOperandNo(), NI);
7408 else
7409 U->set(NI);
7410 Changed = true;
7411 }
7412
7413 // Remove instructions that are dead after sinking.
7414 for (auto *I : MaybeDead) {
7415 if (!I->hasNUsesOrMore(1)) {
7416 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7417 I->eraseFromParent();
7418 }
7419 }
7420
7421 return Changed;
7422}
7423
7424bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7425 Value *Cond = SI->getCondition();
7426 Type *OldType = Cond->getType();
7427 LLVMContext &Context = Cond->getContext();
7428 EVT OldVT = TLI->getValueType(*DL, OldType);
7429 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7430 unsigned RegWidth = RegType.getSizeInBits();
7431
7432 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7433 return false;
7434
7435 // If the register width is greater than the type width, expand the condition
7436 // of the switch instruction and each case constant to the width of the
7437 // register. By widening the type of the switch condition, subsequent
7438 // comparisons (for case comparisons) will not need to be extended to the
7439 // preferred register width, so we will potentially eliminate N-1 extends,
7440 // where N is the number of cases in the switch.
7441 auto *NewType = Type::getIntNTy(Context, RegWidth);
7442
7443 // Extend the switch condition and case constants using the target preferred
7444 // extend unless the switch condition is a function argument with an extend
7445 // attribute. In that case, we can avoid an unnecessary mask/extension by
7446 // matching the argument extension instead.
7447 Instruction::CastOps ExtType = Instruction::ZExt;
7448 // Some targets prefer SExt over ZExt.
7449 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7450 ExtType = Instruction::SExt;
7451
7452 if (auto *Arg = dyn_cast<Argument>(Cond)) {
7453 if (Arg->hasSExtAttr())
7454 ExtType = Instruction::SExt;
7455 if (Arg->hasZExtAttr())
7456 ExtType = Instruction::ZExt;
7457 }
7458
7459 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7460 ExtInst->insertBefore(SI);
7461 ExtInst->setDebugLoc(SI->getDebugLoc());
7462 SI->setCondition(ExtInst);
7463 for (auto Case : SI->cases()) {
7464 const APInt &NarrowConst = Case.getCaseValue()->getValue();
7465 APInt WideConst = (ExtType == Instruction::ZExt)
7466 ? NarrowConst.zext(RegWidth)
7467 : NarrowConst.sext(RegWidth);
7468 Case.setValue(ConstantInt::get(Context, WideConst));
7469 }
7470
7471 return true;
7472}
7473
7474bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7475 // The SCCP optimization tends to produce code like this:
7476 // switch(x) { case 42: phi(42, ...) }
7477 // Materializing the constant for the phi-argument needs instructions; So we
7478 // change the code to:
7479 // switch(x) { case 42: phi(x, ...) }
7480
7481 Value *Condition = SI->getCondition();
7482 // Avoid endless loop in degenerate case.
7483 if (isa<ConstantInt>(*Condition))
7484 return false;
7485
7486 bool Changed = false;
7487 BasicBlock *SwitchBB = SI->getParent();
7488 Type *ConditionType = Condition->getType();
7489
7490 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7491 ConstantInt *CaseValue = Case.getCaseValue();
7492 BasicBlock *CaseBB = Case.getCaseSuccessor();
7493 // Set to true if we previously checked that `CaseBB` is only reached by
7494 // a single case from this switch.
7495 bool CheckedForSinglePred = false;
7496 for (PHINode &PHI : CaseBB->phis()) {
7497 Type *PHIType = PHI.getType();
7498 // If ZExt is free then we can also catch patterns like this:
7499 // switch((i32)x) { case 42: phi((i64)42, ...); }
7500 // and replace `(i64)42` with `zext i32 %x to i64`.
7501 bool TryZExt =
7502 PHIType->isIntegerTy() &&
7503 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7504 TLI->isZExtFree(ConditionType, PHIType);
7505 if (PHIType == ConditionType || TryZExt) {
7506 // Set to true to skip this case because of multiple preds.
7507 bool SkipCase = false;
7508 Value *Replacement = nullptr;
7509 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
7510 Value *PHIValue = PHI.getIncomingValue(I);
7511 if (PHIValue != CaseValue) {
7512 if (!TryZExt)
7513 continue;
7514 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7515 if (!PHIValueInt ||
7516 PHIValueInt->getValue() !=
7517 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7518 continue;
7519 }
7520 if (PHI.getIncomingBlock(I) != SwitchBB)
7521 continue;
7522 // We cannot optimize if there are multiple case labels jumping to
7523 // this block. This check may get expensive when there are many
7524 // case labels so we test for it last.
7525 if (!CheckedForSinglePred) {
7526 CheckedForSinglePred = true;
7527 if (SI->findCaseDest(CaseBB) == nullptr) {
7528 SkipCase = true;
7529 break;
7530 }
7531 }
7532
7533 if (Replacement == nullptr) {
7534 if (PHIValue == CaseValue) {
7535 Replacement = Condition;
7536 } else {
7537 IRBuilder<> Builder(SI);
7538 Replacement = Builder.CreateZExt(Condition, PHIType);
7539 }
7540 }
7541 PHI.setIncomingValue(I, Replacement);
7542 Changed = true;
7543 }
7544 if (SkipCase)
7545 break;
7546 }
7547 }
7548 }
7549 return Changed;
7550}
7551
7552bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7553 bool Changed = optimizeSwitchType(SI);
7554 Changed |= optimizeSwitchPhiConstants(SI);
7555 return Changed;
7556}
7557
7558namespace {
7559
7560/// Helper class to promote a scalar operation to a vector one.
7561/// This class is used to move downward extractelement transition.
7562/// E.g.,
7563/// a = vector_op <2 x i32>
7564/// b = extractelement <2 x i32> a, i32 0
7565/// c = scalar_op b
7566/// store c
7567///
7568/// =>
7569/// a = vector_op <2 x i32>
7570/// c = vector_op a (equivalent to scalar_op on the related lane)
7571/// * d = extractelement <2 x i32> c, i32 0
7572/// * store d
7573/// Assuming both extractelement and store can be combine, we get rid of the
7574/// transition.
7575class VectorPromoteHelper {
7576 /// DataLayout associated with the current module.
7577 const DataLayout &DL;
7578
7579 /// Used to perform some checks on the legality of vector operations.
7580 const TargetLowering &TLI;
7581
7582 /// Used to estimated the cost of the promoted chain.
7583 const TargetTransformInfo &TTI;
7584
7585 /// The transition being moved downwards.
7586 Instruction *Transition;
7587
7588 /// The sequence of instructions to be promoted.
7589 SmallVector<Instruction *, 4> InstsToBePromoted;
7590
7591 /// Cost of combining a store and an extract.
7592 unsigned StoreExtractCombineCost;
7593
7594 /// Instruction that will be combined with the transition.
7595 Instruction *CombineInst = nullptr;
7596
7597 /// The instruction that represents the current end of the transition.
7598 /// Since we are faking the promotion until we reach the end of the chain
7599 /// of computation, we need a way to get the current end of the transition.
7600 Instruction *getEndOfTransition() const {
7601 if (InstsToBePromoted.empty())
7602 return Transition;
7603 return InstsToBePromoted.back();
7604 }
7605
7606 /// Return the index of the original value in the transition.
7607 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7608 /// c, is at index 0.
7609 unsigned getTransitionOriginalValueIdx() const {
7610 assert(isa<ExtractElementInst>(Transition) &&
7611 "Other kind of transitions are not supported yet");
7612 return 0;
7613 }
7614
7615 /// Return the index of the index in the transition.
7616 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
7617 /// is at index 1.
7618 unsigned getTransitionIdx() const {
7619 assert(isa<ExtractElementInst>(Transition) &&
7620 "Other kind of transitions are not supported yet");
7621 return 1;
7622 }
7623
7624 /// Get the type of the transition.
7625 /// This is the type of the original value.
7626 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
7627 /// transition is <2 x i32>.
7628 Type *getTransitionType() const {
7629 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
7630 }
7631
7632 /// Promote \p ToBePromoted by moving \p Def downward through.
7633 /// I.e., we have the following sequence:
7634 /// Def = Transition <ty1> a to <ty2>
7635 /// b = ToBePromoted <ty2> Def, ...
7636 /// =>
7637 /// b = ToBePromoted <ty1> a, ...
7638 /// Def = Transition <ty1> ToBePromoted to <ty2>
7639 void promoteImpl(Instruction *ToBePromoted);
7640
7641 /// Check whether or not it is profitable to promote all the
7642 /// instructions enqueued to be promoted.
7643 bool isProfitableToPromote() {
7644 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
7645 unsigned Index = isa<ConstantInt>(ValIdx)
7646 ? cast<ConstantInt>(ValIdx)->getZExtValue()
7647 : -1;
7648 Type *PromotedType = getTransitionType();
7649
7650 StoreInst *ST = cast<StoreInst>(CombineInst);
7651 unsigned AS = ST->getPointerAddressSpace();
7652 // Check if this store is supported.
7654 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
7655 ST->getAlign())) {
7656 // If this is not supported, there is no way we can combine
7657 // the extract with the store.
7658 return false;
7659 }
7660
7661 // The scalar chain of computation has to pay for the transition
7662 // scalar to vector.
7663 // The vector chain has to account for the combining cost.
7666 InstructionCost ScalarCost =
7667 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
7668 InstructionCost VectorCost = StoreExtractCombineCost;
7669 for (const auto &Inst : InstsToBePromoted) {
7670 // Compute the cost.
7671 // By construction, all instructions being promoted are arithmetic ones.
7672 // Moreover, one argument is a constant that can be viewed as a splat
7673 // constant.
7674 Value *Arg0 = Inst->getOperand(0);
7675 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
7676 isa<ConstantFP>(Arg0);
7677 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
7678 if (IsArg0Constant)
7680 else
7682
7683 ScalarCost += TTI.getArithmeticInstrCost(
7684 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
7685 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
7686 CostKind, Arg0Info, Arg1Info);
7687 }
7688 LLVM_DEBUG(
7689 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
7690 << ScalarCost << "\nVector: " << VectorCost << '\n');
7691 return ScalarCost > VectorCost;
7692 }
7693
7694 /// Generate a constant vector with \p Val with the same
7695 /// number of elements as the transition.
7696 /// \p UseSplat defines whether or not \p Val should be replicated
7697 /// across the whole vector.
7698 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
7699 /// otherwise we generate a vector with as many undef as possible:
7700 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
7701 /// used at the index of the extract.
7702 Value *getConstantVector(Constant *Val, bool UseSplat) const {
7703 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
7704 if (!UseSplat) {
7705 // If we cannot determine where the constant must be, we have to
7706 // use a splat constant.
7707 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
7708 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
7709 ExtractIdx = CstVal->getSExtValue();
7710 else
7711 UseSplat = true;
7712 }
7713
7714 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
7715 if (UseSplat)
7716 return ConstantVector::getSplat(EC, Val);
7717
7718 if (!EC.isScalable()) {
7720 UndefValue *UndefVal = UndefValue::get(Val->getType());
7721 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
7722 if (Idx == ExtractIdx)
7723 ConstVec.push_back(Val);
7724 else
7725 ConstVec.push_back(UndefVal);
7726 }
7727 return ConstantVector::get(ConstVec);
7728 } else
7730 "Generate scalable vector for non-splat is unimplemented");
7731 }
7732
7733 /// Check if promoting to a vector type an operand at \p OperandIdx
7734 /// in \p Use can trigger undefined behavior.
7735 static bool canCauseUndefinedBehavior(const Instruction *Use,
7736 unsigned OperandIdx) {
7737 // This is not safe to introduce undef when the operand is on
7738 // the right hand side of a division-like instruction.
7739 if (OperandIdx != 1)
7740 return false;
7741 switch (Use->getOpcode()) {
7742 default:
7743 return false;
7744 case Instruction::SDiv:
7745 case Instruction::UDiv:
7746 case Instruction::SRem:
7747 case Instruction::URem:
7748 return true;
7749 case Instruction::FDiv:
7750 case Instruction::FRem:
7751 return !Use->hasNoNaNs();
7752 }
7753 llvm_unreachable(nullptr);
7754 }
7755
7756public:
7757 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
7758 const TargetTransformInfo &TTI, Instruction *Transition,
7759 unsigned CombineCost)
7760 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
7761 StoreExtractCombineCost(CombineCost) {
7762 assert(Transition && "Do not know how to promote null");
7763 }
7764
7765 /// Check if we can promote \p ToBePromoted to \p Type.
7766 bool canPromote(const Instruction *ToBePromoted) const {
7767 // We could support CastInst too.
7768 return isa<BinaryOperator>(ToBePromoted);
7769 }
7770
7771 /// Check if it is profitable to promote \p ToBePromoted
7772 /// by moving downward the transition through.
7773 bool shouldPromote(const Instruction *ToBePromoted) const {
7774 // Promote only if all the operands can be statically expanded.
7775 // Indeed, we do not want to introduce any new kind of transitions.
7776 for (const Use &U : ToBePromoted->operands()) {
7777 const Value *Val = U.get();
7778 if (Val == getEndOfTransition()) {
7779 // If the use is a division and the transition is on the rhs,
7780 // we cannot promote the operation, otherwise we may create a
7781 // division by zero.
7782 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
7783 return false;
7784 continue;
7785 }
7786 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
7787 !isa<ConstantFP>(Val))
7788 return false;
7789 }
7790 // Check that the resulting operation is legal.
7791 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
7792 if (!ISDOpcode)
7793 return false;
7794 return StressStoreExtract ||
7796 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
7797 }
7798
7799 /// Check whether or not \p Use can be combined
7800 /// with the transition.
7801 /// I.e., is it possible to do Use(Transition) => AnotherUse?
7802 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
7803
7804 /// Record \p ToBePromoted as part of the chain to be promoted.
7805 void enqueueForPromotion(Instruction *ToBePromoted) {
7806 InstsToBePromoted.push_back(ToBePromoted);
7807 }
7808
7809 /// Set the instruction that will be combined with the transition.
7810 void recordCombineInstruction(Instruction *ToBeCombined) {
7811 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
7812 CombineInst = ToBeCombined;
7813 }
7814
7815 /// Promote all the instructions enqueued for promotion if it is
7816 /// is profitable.
7817 /// \return True if the promotion happened, false otherwise.
7818 bool promote() {
7819 // Check if there is something to promote.
7820 // Right now, if we do not have anything to combine with,
7821 // we assume the promotion is not profitable.
7822 if (InstsToBePromoted.empty() || !CombineInst)
7823 return false;
7824
7825 // Check cost.
7826 if (!StressStoreExtract && !isProfitableToPromote())
7827 return false;
7828
7829 // Promote.
7830 for (auto &ToBePromoted : InstsToBePromoted)
7831 promoteImpl(ToBePromoted);
7832 InstsToBePromoted.clear();
7833 return true;
7834 }
7835};
7836
7837} // end anonymous namespace
7838
7839void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
7840 // At this point, we know that all the operands of ToBePromoted but Def
7841 // can be statically promoted.
7842 // For Def, we need to use its parameter in ToBePromoted:
7843 // b = ToBePromoted ty1 a
7844 // Def = Transition ty1 b to ty2
7845 // Move the transition down.
7846 // 1. Replace all uses of the promoted operation by the transition.
7847 // = ... b => = ... Def.
7848 assert(ToBePromoted->getType() == Transition->getType() &&
7849 "The type of the result of the transition does not match "
7850 "the final type");
7851 ToBePromoted->replaceAllUsesWith(Transition);
7852 // 2. Update the type of the uses.
7853 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
7854 Type *TransitionTy = getTransitionType();
7855 ToBePromoted->mutateType(TransitionTy);
7856 // 3. Update all the operands of the promoted operation with promoted
7857 // operands.
7858 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
7859 for (Use &U : ToBePromoted->operands()) {
7860 Value *Val = U.get();
7861 Value *NewVal = nullptr;
7862 if (Val == Transition)
7863 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
7864 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
7865 isa<ConstantFP>(Val)) {
7866 // Use a splat constant if it is not safe to use undef.
7867 NewVal = getConstantVector(
7868 cast<Constant>(Val),
7869 isa<UndefValue>(Val) ||
7870 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
7871 } else
7872 llvm_unreachable("Did you modified shouldPromote and forgot to update "
7873 "this?");
7874 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
7875 }
7876 Transition->moveAfter(ToBePromoted);
7877 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
7878}
7879
7880/// Some targets can do store(extractelement) with one instruction.
7881/// Try to push the extractelement towards the stores when the target
7882/// has this feature and this is profitable.
7883bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
7884 unsigned CombineCost = std::numeric_limits<unsigned>::max();
7885 if (DisableStoreExtract ||
7888 Inst->getOperand(1), CombineCost)))
7889 return false;
7890
7891 // At this point we know that Inst is a vector to scalar transition.
7892 // Try to move it down the def-use chain, until:
7893 // - We can combine the transition with its single use
7894 // => we got rid of the transition.
7895 // - We escape the current basic block
7896 // => we would need to check that we are moving it at a cheaper place and
7897 // we do not do that for now.
7898 BasicBlock *Parent = Inst->getParent();
7899 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
7900 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
7901 // If the transition has more than one use, assume this is not going to be
7902 // beneficial.
7903 while (Inst->hasOneUse()) {
7904 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
7905 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
7906
7907 if (ToBePromoted->getParent() != Parent) {
7908 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
7909 << ToBePromoted->getParent()->getName()
7910 << ") than the transition (" << Parent->getName()
7911 << ").\n");
7912 return false;
7913 }
7914
7915 if (VPH.canCombine(ToBePromoted)) {
7916 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
7917 << "will be combined with: " << *ToBePromoted << '\n');
7918 VPH.recordCombineInstruction(ToBePromoted);
7919 bool Changed = VPH.promote();
7920 NumStoreExtractExposed += Changed;
7921 return Changed;
7922 }
7923
7924 LLVM_DEBUG(dbgs() << "Try promoting.\n");
7925 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
7926 return false;
7927
7928 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
7929
7930 VPH.enqueueForPromotion(ToBePromoted);
7931 Inst = ToBePromoted;
7932 }
7933 return false;
7934}
7935
7936/// For the instruction sequence of store below, F and I values
7937/// are bundled together as an i64 value before being stored into memory.
7938/// Sometimes it is more efficient to generate separate stores for F and I,
7939/// which can remove the bitwise instructions or sink them to colder places.
7940///
7941/// (store (or (zext (bitcast F to i32) to i64),
7942/// (shl (zext I to i64), 32)), addr) -->
7943/// (store F, addr) and (store I, addr+4)
7944///
7945/// Similarly, splitting for other merged store can also be beneficial, like:
7946/// For pair of {i32, i32}, i64 store --> two i32 stores.
7947/// For pair of {i32, i16}, i64 store --> two i32 stores.
7948/// For pair of {i16, i16}, i32 store --> two i16 stores.
7949/// For pair of {i16, i8}, i32 store --> two i16 stores.
7950/// For pair of {i8, i8}, i16 store --> two i8 stores.
7951///
7952/// We allow each target to determine specifically which kind of splitting is
7953/// supported.
7954///
7955/// The store patterns are commonly seen from the simple code snippet below
7956/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
7957/// void goo(const std::pair<int, float> &);
7958/// hoo() {
7959/// ...
7960/// goo(std::make_pair(tmp, ftmp));
7961/// ...
7962/// }
7963///
7964/// Although we already have similar splitting in DAG Combine, we duplicate
7965/// it in CodeGenPrepare to catch the case in which pattern is across
7966/// multiple BBs. The logic in DAG Combine is kept to catch case generated
7967/// during code expansion.
7969 const TargetLowering &TLI) {
7970 // Handle simple but common cases only.
7971 Type *StoreType = SI.getValueOperand()->getType();
7972
7973 // The code below assumes shifting a value by <number of bits>,
7974 // whereas scalable vectors would have to be shifted by
7975 // <2log(vscale) + number of bits> in order to store the
7976 // low/high parts. Bailing out for now.
7977 if (StoreType->isScalableTy())
7978 return false;
7979
7980 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
7981 DL.getTypeSizeInBits(StoreType) == 0)
7982 return false;
7983
7984 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
7985 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
7986 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
7987 return false;
7988
7989 // Don't split the store if it is volatile.
7990 if (SI.isVolatile())
7991 return false;
7992
7993 // Match the following patterns:
7994 // (store (or (zext LValue to i64),
7995 // (shl (zext HValue to i64), 32)), HalfValBitSize)
7996 // or
7997 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
7998 // (zext LValue to i64),
7999 // Expect both operands of OR and the first operand of SHL have only
8000 // one use.
8001 Value *LValue, *HValue;
8002 if (!match(SI.getValueOperand(),
8005 m_SpecificInt(HalfValBitSize))))))
8006 return false;
8007
8008 // Check LValue and HValue are int with size less or equal than 32.
8009 if (!LValue->getType()->isIntegerTy() ||
8010 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8011 !HValue->getType()->isIntegerTy() ||
8012 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8013 return false;
8014
8015 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8016 // as the input of target query.
8017 auto *LBC = dyn_cast<BitCastInst>(LValue);
8018 auto *HBC = dyn_cast<BitCastInst>(HValue);
8019 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8020 : EVT::getEVT(LValue->getType());
8021 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8022 : EVT::getEVT(HValue->getType());
8023 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8024 return false;
8025
8026 // Start to split store.
8027 IRBuilder<> Builder(SI.getContext());
8028 Builder.SetInsertPoint(&SI);
8029
8030 // If LValue/HValue is a bitcast in another BB, create a new one in current
8031 // BB so it may be merged with the splitted stores by dag combiner.
8032 if (LBC && LBC->getParent() != SI.getParent())
8033 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8034 if (HBC && HBC->getParent() != SI.getParent())
8035 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8036
8037 bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
8038 auto CreateSplitStore = [&](Value *V, bool Upper) {
8039 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8040 Value *Addr = SI.getPointerOperand();
8041 Align Alignment = SI.getAlign();
8042 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8043 if (IsOffsetStore) {
8044 Addr = Builder.CreateGEP(
8045 SplitStoreType, Addr,
8046 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8047
8048 // When splitting the store in half, naturally one half will retain the
8049 // alignment of the original wider store, regardless of whether it was
8050 // over-aligned or not, while the other will require adjustment.
8051 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8052 }
8053 Builder.CreateAlignedStore(V, Addr, Alignment);
8054 };
8055
8056 CreateSplitStore(LValue, false);
8057 CreateSplitStore(HValue, true);
8058
8059 // Delete the old store.
8060 SI.eraseFromParent();
8061 return true;
8062}
8063
8064// Return true if the GEP has two operands, the first operand is of a sequential
8065// type, and the second operand is a constant.
8068 return GEP->getNumOperands() == 2 && I.isSequential() &&
8069 isa<ConstantInt>(GEP->getOperand(1));
8070}
8071
8072// Try unmerging GEPs to reduce liveness interference (register pressure) across
8073// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8074// reducing liveness interference across those edges benefits global register
8075// allocation. Currently handles only certain cases.
8076//
8077// For example, unmerge %GEPI and %UGEPI as below.
8078//
8079// ---------- BEFORE ----------
8080// SrcBlock:
8081// ...
8082// %GEPIOp = ...
8083// ...
8084// %GEPI = gep %GEPIOp, Idx
8085// ...
8086// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8087// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8088// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8089// %UGEPI)
8090//
8091// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8092// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8093// ...
8094//
8095// DstBi:
8096// ...
8097// %UGEPI = gep %GEPIOp, UIdx
8098// ...
8099// ---------------------------
8100//
8101// ---------- AFTER ----------
8102// SrcBlock:
8103// ... (same as above)
8104// (* %GEPI is still alive on the indirectbr edges)
8105// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8106// unmerging)
8107// ...
8108//
8109// DstBi:
8110// ...
8111// %UGEPI = gep %GEPI, (UIdx-Idx)
8112// ...
8113// ---------------------------
8114//
8115// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8116// no longer alive on them.
8117//
8118// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8119// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8120// not to disable further simplications and optimizations as a result of GEP
8121// merging.
8122//
8123// Note this unmerging may increase the length of the data flow critical path
8124// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8125// between the register pressure and the length of data-flow critical
8126// path. Restricting this to the uncommon IndirectBr case would minimize the
8127// impact of potentially longer critical path, if any, and the impact on compile
8128// time.
8130 const TargetTransformInfo *TTI) {
8131 BasicBlock *SrcBlock = GEPI->getParent();
8132 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8133 // (non-IndirectBr) cases exit early here.
8134 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8135 return false;
8136 // Check that GEPI is a simple gep with a single constant index.
8137 if (!GEPSequentialConstIndexed(GEPI))
8138 return false;
8139 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8140 // Check that GEPI is a cheap one.
8141 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8144 return false;
8145 Value *GEPIOp = GEPI->getOperand(0);
8146 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8147 if (!isa<Instruction>(GEPIOp))
8148 return false;
8149 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8150 if (GEPIOpI->getParent() != SrcBlock)
8151 return false;
8152 // Check that GEP is used outside the block, meaning it's alive on the
8153 // IndirectBr edge(s).
8154 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8155 if (auto *I = dyn_cast<Instruction>(Usr)) {
8156 if (I->getParent() != SrcBlock) {
8157 return true;
8158 }
8159 }
8160 return false;
8161 }))
8162 return false;
8163 // The second elements of the GEP chains to be unmerged.
8164 std::vector<GetElementPtrInst *> UGEPIs;
8165 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8166 // on IndirectBr edges.
8167 for (User *Usr : GEPIOp->users()) {
8168 if (Usr == GEPI)
8169 continue;
8170 // Check if Usr is an Instruction. If not, give up.
8171 if (!isa<Instruction>(Usr))
8172 return false;
8173 auto *UI = cast<Instruction>(Usr);
8174 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8175 if (UI->getParent() == SrcBlock)
8176 continue;
8177 // Check if Usr is a GEP. If not, give up.
8178 if (!isa<GetElementPtrInst>(Usr))
8179 return false;
8180 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8181 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8182 // the pointer operand to it. If so, record it in the vector. If not, give
8183 // up.
8184 if (!GEPSequentialConstIndexed(UGEPI))
8185 return false;
8186 if (UGEPI->getOperand(0) != GEPIOp)
8187 return false;
8188 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8189 return false;
8190 if (GEPIIdx->getType() !=
8191 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8192 return false;
8193 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8194 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8197 return false;
8198 UGEPIs.push_back(UGEPI);
8199 }
8200 if (UGEPIs.size() == 0)
8201 return false;
8202 // Check the materializing cost of (Uidx-Idx).
8203 for (GetElementPtrInst *UGEPI : UGEPIs) {
8204 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8205 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8207 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8208 if (ImmCost > TargetTransformInfo::TCC_Basic)
8209 return false;
8210 }
8211 // Now unmerge between GEPI and UGEPIs.
8212 for (GetElementPtrInst *UGEPI : UGEPIs) {
8213 UGEPI->setOperand(0, GEPI);
8214 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8215 Constant *NewUGEPIIdx = ConstantInt::get(
8216 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8217 UGEPI->setOperand(1, NewUGEPIIdx);
8218 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8219 // inbounds to avoid UB.
8220 if (!GEPI->isInBounds()) {
8221 UGEPI->setIsInBounds(false);
8222 }
8223 }
8224 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8225 // alive on IndirectBr edges).
8226 assert(llvm::none_of(GEPIOp->users(),
8227 [&](User *Usr) {
8228 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8229 }) &&
8230 "GEPIOp is used outside SrcBlock");
8231 return true;
8232}
8233
8234static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8236 bool IsHugeFunc) {
8237 // Try and convert
8238 // %c = icmp ult %x, 8
8239 // br %c, bla, blb
8240 // %tc = lshr %x, 3
8241 // to
8242 // %tc = lshr %x, 3
8243 // %c = icmp eq %tc, 0
8244 // br %c, bla, blb
8245 // Creating the cmp to zero can be better for the backend, especially if the
8246 // lshr produces flags that can be used automatically.
8247 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8248 return false;
8249
8250 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8251 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8252 return false;
8253
8254 Value *X = Cmp->getOperand(0);
8255 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8256
8257 for (auto *U : X->users()) {
8258 Instruction *UI = dyn_cast<Instruction>(U);
8259 // A quick dominance check
8260 if (!UI ||
8261 (UI->getParent() != Branch->getParent() &&
8262 UI->getParent() != Branch->getSuccessor(0) &&
8263 UI->getParent() != Branch->getSuccessor(1)) ||
8264 (UI->getParent() != Branch->getParent() &&
8266 continue;
8267
8268 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8269 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8270 IRBuilder<> Builder(Branch);
8271 if (UI->getParent() != Branch->getParent())
8272 UI->moveBefore(Branch);
8273 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8274 ConstantInt::get(UI->getType(), 0));
8275 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8276 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8277 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8278 return true;
8279 }
8280 if (Cmp->isEquality() &&
8281 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8282 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) {
8283 IRBuilder<> Builder(Branch);
8284 if (UI->getParent() != Branch->getParent())
8285 UI->moveBefore(Branch);
8286 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8287 ConstantInt::get(UI->getType(), 0));
8288 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8289 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8290 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8291 return true;
8292 }
8293 }
8294 return false;
8295}
8296
8297bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8298 bool AnyChange = false;
8299 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8300
8301 // Bail out if we inserted the instruction to prevent optimizations from
8302 // stepping on each other's toes.
8303 if (InsertedInsts.count(I))
8304 return AnyChange;
8305
8306 // TODO: Move into the switch on opcode below here.
8307 if (PHINode *P = dyn_cast<PHINode>(I)) {
8308 // It is possible for very late stage optimizations (such as SimplifyCFG)
8309 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8310 // trivial PHI, go ahead and zap it here.
8311 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8312 LargeOffsetGEPMap.erase(P);
8313 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8314 P->eraseFromParent();
8315 ++NumPHIsElim;
8316 return true;
8317 }
8318 return AnyChange;
8319 }
8320
8321 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8322 // If the source of the cast is a constant, then this should have
8323 // already been constant folded. The only reason NOT to constant fold
8324 // it is if something (e.g. LSR) was careful to place the constant
8325 // evaluation in a block other than then one that uses it (e.g. to hoist
8326 // the address of globals out of a loop). If this is the case, we don't
8327 // want to forward-subst the cast.
8328 if (isa<Constant>(CI->getOperand(0)))
8329 return AnyChange;
8330
8331 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8332 return true;
8333
8334 if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) &&
8336 I, LI->getLoopFor(I->getParent()), *TTI))
8337 return true;
8338
8339 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8340 /// Sink a zext or sext into its user blocks if the target type doesn't
8341 /// fit in one register
8342 if (TLI->getTypeAction(CI->getContext(),
8343 TLI->getValueType(*DL, CI->getType())) ==
8344 TargetLowering::TypeExpandInteger) {
8345 return SinkCast(CI);
8346 } else {
8348 I, LI->getLoopFor(I->getParent()), *TTI))
8349 return true;
8350
8351 bool MadeChange = optimizeExt(I);
8352 return MadeChange | optimizeExtUses(I);
8353 }
8354 }
8355 return AnyChange;
8356 }
8357
8358 if (auto *Cmp = dyn_cast<CmpInst>(I))
8359 if (optimizeCmp(Cmp, ModifiedDT))
8360 return true;
8361
8362 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8363 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8364 bool Modified = optimizeLoadExt(LI);
8365 unsigned AS = LI->getPointerAddressSpace();
8366 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8367 return Modified;
8368 }
8369
8370 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8371 if (splitMergedValStore(*SI, *DL, *TLI))
8372 return true;
8373 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8374 unsigned AS = SI->getPointerAddressSpace();
8375 return optimizeMemoryInst(I, SI->getOperand(1),
8376 SI->getOperand(0)->getType(), AS);
8377 }
8378
8379 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8380 unsigned AS = RMW->getPointerAddressSpace();
8381 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8382 }
8383
8384 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8385 unsigned AS = CmpX->getPointerAddressSpace();
8386 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8387 CmpX->getCompareOperand()->getType(), AS);
8388 }
8389
8390 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8391
8392 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8393 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8394 return true;
8395
8396 // TODO: Move this into the switch on opcode - it handles shifts already.
8397 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8398 BinOp->getOpcode() == Instruction::LShr)) {
8399 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8400 if (CI && TLI->hasExtractBitsInsn())
8401 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8402 return true;
8403 }
8404
8405 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8406 if (GEPI->hasAllZeroIndices()) {
8407 /// The GEP operand must be a pointer, so must its result -> BitCast
8408 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8409 GEPI->getName(), GEPI->getIterator());
8410 NC->setDebugLoc(GEPI->getDebugLoc());
8411 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8413 GEPI, TLInfo, nullptr,
8414 [&](Value *V) { removeAllAssertingVHReferences(V); });
8415 ++NumGEPsElim;
8416 optimizeInst(NC, ModifiedDT);
8417 return true;
8418 }
8420 return true;
8421 }
8422 }
8423
8424 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8425 // freeze(icmp a, const)) -> icmp (freeze a), const
8426 // This helps generate efficient conditional jumps.
8427 Instruction *CmpI = nullptr;
8428 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8429 CmpI = II;
8430 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8431 CmpI = F->getFastMathFlags().none() ? F : nullptr;
8432
8433 if (CmpI && CmpI->hasOneUse()) {
8434 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8435 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8436 isa<ConstantPointerNull>(Op0);
8437 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8438 isa<ConstantPointerNull>(Op1);
8439 if (Const0 || Const1) {
8440 if (!Const0 || !Const1) {
8441 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
8442 F->takeName(FI);
8443 CmpI->setOperand(Const0 ? 1 : 0, F);
8444 }
8445 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8446 FI->eraseFromParent();
8447 return true;
8448 }
8449 }
8450 return AnyChange;
8451 }
8452
8453 if (tryToSinkFreeOperands(I))
8454 return true;
8455
8456 switch (I->getOpcode()) {
8457 case Instruction::Shl:
8458 case Instruction::LShr:
8459 case Instruction::AShr:
8460 return optimizeShiftInst(cast<BinaryOperator>(I));
8461 case Instruction::Call:
8462 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
8463 case Instruction::Select:
8464 return optimizeSelectInst(cast<SelectInst>(I));
8465 case Instruction::ShuffleVector:
8466 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8467 case Instruction::Switch:
8468 return optimizeSwitchInst(cast<SwitchInst>(I));
8469 case Instruction::ExtractElement:
8470 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8471 case Instruction::Br:
8472 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8473 }
8474
8475 return AnyChange;
8476}
8477
8478/// Given an OR instruction, check to see if this is a bitreverse
8479/// idiom. If so, insert the new intrinsic and return true.
8480bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8481 if (!I.getType()->isIntegerTy() ||
8483 TLI->getValueType(*DL, I.getType(), true)))
8484 return false;
8485
8487 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
8488 return false;
8489 Instruction *LastInst = Insts.back();
8490 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8492 &I, TLInfo, nullptr,
8493 [&](Value *V) { removeAllAssertingVHReferences(V); });
8494 return true;
8495}
8496
8497// In this pass we look for GEP and cast instructions that are used
8498// across basic blocks and rewrite them to improve basic-block-at-a-time
8499// selection.
8500bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8501 SunkAddrs.clear();
8502 bool MadeChange = false;
8503
8504 do {
8505 CurInstIterator = BB.begin();
8506 ModifiedDT = ModifyDT::NotModifyDT;
8507 while (CurInstIterator != BB.end()) {
8508 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8509 if (ModifiedDT != ModifyDT::NotModifyDT) {
8510 // For huge function we tend to quickly go though the inner optmization
8511 // opportunities in the BB. So we go back to the BB head to re-optimize
8512 // each instruction instead of go back to the function head.
8513 if (IsHugeFunc) {
8514 DT.reset();
8515 getDT(*BB.getParent());
8516 break;
8517 } else {
8518 return true;
8519 }
8520 }
8521 }
8522 } while (ModifiedDT == ModifyDT::ModifyInstDT);
8523
8524 bool MadeBitReverse = true;
8525 while (MadeBitReverse) {
8526 MadeBitReverse = false;
8527 for (auto &I : reverse(BB)) {
8528 if (makeBitReverse(I)) {
8529 MadeBitReverse = MadeChange = true;
8530 break;
8531 }
8532 }
8533 }
8534 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8535
8536 return MadeChange;
8537}
8538
8539// Some CGP optimizations may move or alter what's computed in a block. Check
8540// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
8541bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
8542 assert(isa<DbgValueInst>(I));
8543 DbgValueInst &DVI = *cast<DbgValueInst>(I);
8544
8545 // Does this dbg.value refer to a sunk address calculation?
8546 bool AnyChange = false;
8547 SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
8548 DVI.location_ops().end());
8549 for (Value *Location : LocationOps) {
8550 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8551 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8552 if (SunkAddr) {
8553 // Point dbg.value at locally computed address, which should give the best
8554 // opportunity to be accurately lowered. This update may change the type
8555 // of pointer being referred to; however this makes no difference to
8556 // debugging information, and we can't generate bitcasts that may affect
8557 // codegen.
8558 DVI.replaceVariableLocationOp(Location, SunkAddr);
8559 AnyChange = true;
8560 }
8561 }
8562 return AnyChange;
8563}
8564
8565bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8566 bool AnyChange = false;
8567 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
8568 AnyChange |= fixupDbgVariableRecord(DVR);
8569 return AnyChange;
8570}
8571
8572// FIXME: should updating debug-info really cause the "changed" flag to fire,
8573// which can cause a function to be reprocessed?
8574bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8575 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8576 DVR.Type != DbgVariableRecord::LocationType::Assign)
8577 return false;
8578
8579 // Does this DbgVariableRecord refer to a sunk address calculation?
8580 bool AnyChange = false;
8581 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8582 DVR.location_ops().end());
8583 for (Value *Location : LocationOps) {
8584 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8585 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8586 if (SunkAddr) {
8587 // Point dbg.value at locally computed address, which should give the best
8588 // opportunity to be accurately lowered. This update may change the type
8589 // of pointer being referred to; however this makes no difference to
8590 // debugging information, and we can't generate bitcasts that may affect
8591 // codegen.
8592 DVR.replaceVariableLocationOp(Location, SunkAddr);
8593 AnyChange = true;
8594 }
8595 }
8596 return AnyChange;
8597}
8598
8600 DVI->removeFromParent();
8601 if (isa<PHINode>(VI))
8602 DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
8603 else
8604 DVI->insertAfter(VI);
8605}
8606
8608 DVR->removeFromParent();
8609 BasicBlock *VIBB = VI->getParent();
8610 if (isa<PHINode>(VI))
8611 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
8612 else
8613 VIBB->insertDbgRecordAfter(DVR, VI);
8614}
8615
8616// A llvm.dbg.value may be using a value before its definition, due to
8617// optimizations in this pass and others. Scan for such dbg.values, and rescue
8618// them by moving the dbg.value to immediately after the value definition.
8619// FIXME: Ideally this should never be necessary, and this has the potential
8620// to re-order dbg.value intrinsics.
8621bool CodeGenPrepare::placeDbgValues(Function &F) {
8622 bool MadeChange = false;
8623 DominatorTree DT(F);
8624
8625 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
8627 for (Value *V : DbgItem->location_ops())
8628 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
8629 VIs.push_back(VI);
8630
8631 // This item may depend on multiple instructions, complicating any
8632 // potential sink. This block takes the defensive approach, opting to
8633 // "undef" the item if it has more than one instruction and any of them do
8634 // not dominate iem.
8635 for (Instruction *VI : VIs) {
8636 if (VI->isTerminator())
8637 continue;
8638
8639 // If VI is a phi in a block with an EHPad terminator, we can't insert
8640 // after it.
8641 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
8642 continue;
8643
8644 // If the defining instruction dominates the dbg.value, we do not need
8645 // to move the dbg.value.
8646 if (DT.dominates(VI, Position))
8647 continue;
8648
8649 // If we depend on multiple instructions and any of them doesn't
8650 // dominate this DVI, we probably can't salvage it: moving it to
8651 // after any of the instructions could cause us to lose the others.
8652 if (VIs.size() > 1) {
8653 LLVM_DEBUG(
8654 dbgs()
8655 << "Unable to find valid location for Debug Value, undefing:\n"
8656 << *DbgItem);
8657 DbgItem->setKillLocation();
8658 break;
8659 }
8660
8661 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
8662 << *DbgItem << ' ' << *VI);
8663 DbgInserterHelper(DbgItem, VI);
8664 MadeChange = true;
8665 ++NumDbgValueMoved;
8666 }
8667 };
8668
8669 for (BasicBlock &BB : F) {
8671 // Process dbg.value intrinsics.
8672 DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
8673 if (DVI) {
8674 DbgProcessor(DVI, DVI);
8675 continue;
8676 }
8677
8678 // If this isn't a dbg.value, process any attached DbgVariableRecord
8679 // records attached to this instruction.
8681 filterDbgVars(Insn.getDbgRecordRange()))) {
8682 if (DVR.Type != DbgVariableRecord::LocationType::Value)
8683 continue;
8684 DbgProcessor(&DVR, &Insn);
8685 }
8686 }
8687 }
8688
8689 return MadeChange;
8690}
8691
8692// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
8693// probes can be chained dependencies of other regular DAG nodes and block DAG
8694// combine optimizations.
8695bool CodeGenPrepare::placePseudoProbes(Function &F) {
8696 bool MadeChange = false;
8697 for (auto &Block : F) {
8698 // Move the rest probes to the beginning of the block.
8699 auto FirstInst = Block.getFirstInsertionPt();
8700 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
8701 ++FirstInst;
8702 BasicBlock::iterator I(FirstInst);
8703 I++;
8704 while (I != Block.end()) {
8705 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
8706 II->moveBefore(&*FirstInst);
8707 MadeChange = true;
8708 }
8709 }
8710 }
8711 return MadeChange;
8712}
8713
8714/// Scale down both weights to fit into uint32_t.
8715static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
8716 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
8717 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
8718 NewTrue = NewTrue / Scale;
8719 NewFalse = NewFalse / Scale;
8720}
8721
8722/// Some targets prefer to split a conditional branch like:
8723/// \code
8724/// %0 = icmp ne i32 %a, 0
8725/// %1 = icmp ne i32 %b, 0
8726/// %or.cond = or i1 %0, %1
8727/// br i1 %or.cond, label %TrueBB, label %FalseBB
8728/// \endcode
8729/// into multiple branch instructions like:
8730/// \code
8731/// bb1:
8732/// %0 = icmp ne i32 %a, 0
8733/// br i1 %0, label %TrueBB, label %bb2
8734/// bb2:
8735/// %1 = icmp ne i32 %b, 0
8736/// br i1 %1, label %TrueBB, label %FalseBB
8737/// \endcode
8738/// This usually allows instruction selection to do even further optimizations
8739/// and combine the compare with the branch instruction. Currently this is
8740/// applied for targets which have "cheap" jump instructions.
8741///
8742/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
8743///
8744bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
8745 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
8746 return false;
8747
8748 bool MadeChange = false;
8749 for (auto &BB : F) {
8750 // Does this BB end with the following?
8751 // %cond1 = icmp|fcmp|binary instruction ...
8752 // %cond2 = icmp|fcmp|binary instruction ...
8753 // %cond.or = or|and i1 %cond1, cond2
8754 // br i1 %cond.or label %dest1, label %dest2"
8755 Instruction *LogicOp;
8756 BasicBlock *TBB, *FBB;
8757 if (!match(BB.getTerminator(),
8758 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
8759 continue;
8760
8761 auto *Br1 = cast<BranchInst>(BB.getTerminator());
8762 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
8763 continue;
8764
8765 // The merging of mostly empty BB can cause a degenerate branch.
8766 if (TBB == FBB)
8767 continue;
8768
8769 unsigned Opc;
8770 Value *Cond1, *Cond2;
8771 if (match(LogicOp,
8772 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
8773 Opc = Instruction::And;
8774 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
8775 m_OneUse(m_Value(Cond2)))))
8776 Opc = Instruction::Or;
8777 else
8778 continue;
8779
8780 auto IsGoodCond = [](Value *Cond) {
8781 return match(
8782 Cond,
8784 m_LogicalOr(m_Value(), m_Value()))));
8785 };
8786 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
8787 continue;
8788
8789 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
8790
8791 // Create a new BB.
8792 auto *TmpBB =
8793 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
8794 BB.getParent(), BB.getNextNode());
8795 if (IsHugeFunc)
8796 FreshBBs.insert(TmpBB);
8797
8798 // Update original basic block by using the first condition directly by the
8799 // branch instruction and removing the no longer needed and/or instruction.
8800 Br1->setCondition(Cond1);
8801 LogicOp->eraseFromParent();
8802
8803 // Depending on the condition we have to either replace the true or the
8804 // false successor of the original branch instruction.
8805 if (Opc == Instruction::And)
8806 Br1->setSuccessor(0, TmpBB);
8807 else
8808 Br1->setSuccessor(1, TmpBB);
8809
8810 // Fill in the new basic block.
8811 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
8812 if (auto *I = dyn_cast<Instruction>(Cond2)) {
8813 I->removeFromParent();
8814 I->insertBefore(Br2);
8815 }
8816
8817 // Update PHI nodes in both successors. The original BB needs to be
8818 // replaced in one successor's PHI nodes, because the branch comes now from
8819 // the newly generated BB (NewBB). In the other successor we need to add one
8820 // incoming edge to the PHI nodes, because both branch instructions target
8821 // now the same successor. Depending on the original branch condition
8822 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
8823 // we perform the correct update for the PHI nodes.
8824 // This doesn't change the successor order of the just created branch
8825 // instruction (or any other instruction).
8826 if (Opc == Instruction::Or)
8827 std::swap(TBB, FBB);
8828
8829 // Replace the old BB with the new BB.
8830 TBB->replacePhiUsesWith(&BB, TmpBB);
8831
8832 // Add another incoming edge from the new BB.
8833 for (PHINode &PN : FBB->phis()) {
8834 auto *Val = PN.getIncomingValueForBlock(&BB);
8835 PN.addIncoming(Val, TmpBB);
8836 }
8837
8838 // Update the branch weights (from SelectionDAGBuilder::
8839 // FindMergedConditions).
8840 if (Opc == Instruction::Or) {
8841 // Codegen X | Y as:
8842 // BB1:
8843 // jmp_if_X TBB
8844 // jmp TmpBB
8845 // TmpBB:
8846 // jmp_if_Y TBB
8847 // jmp FBB
8848 //
8849
8850 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
8851 // The requirement is that
8852 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
8853 // = TrueProb for original BB.
8854 // Assuming the original weights are A and B, one choice is to set BB1's
8855 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
8856 // assumes that
8857 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
8858 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
8859 // TmpBB, but the math is more complicated.
8860 uint64_t TrueWeight, FalseWeight;
8861 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
8862 uint64_t NewTrueWeight = TrueWeight;
8863 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
8864 scaleWeights(NewTrueWeight, NewFalseWeight);
8865 Br1->setMetadata(LLVMContext::MD_prof,
8866 MDBuilder(Br1->getContext())
8867 .createBranchWeights(TrueWeight, FalseWeight));
8868
8869 NewTrueWeight = TrueWeight;
8870 NewFalseWeight = 2 * FalseWeight;
8871 scaleWeights(NewTrueWeight, NewFalseWeight);
8872 Br2->setMetadata(LLVMContext::MD_prof,
8873 MDBuilder(Br2->getContext())
8874 .createBranchWeights(TrueWeight, FalseWeight));
8875 }
8876 } else {
8877 // Codegen X & Y as:
8878 // BB1:
8879 // jmp_if_X TmpBB
8880 // jmp FBB
8881 // TmpBB:
8882 // jmp_if_Y TBB
8883 // jmp FBB
8884 //
8885 // This requires creation of TmpBB after CurBB.
8886
8887 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
8888 // The requirement is that
8889 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
8890 // = FalseProb for original BB.
8891 // Assuming the original weights are A and B, one choice is to set BB1's
8892 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
8893 // assumes that
8894 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
8895 uint64_t TrueWeight, FalseWeight;
8896 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
8897 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
8898 uint64_t NewFalseWeight = FalseWeight;
8899 scaleWeights(NewTrueWeight, NewFalseWeight);
8900 Br1->setMetadata(LLVMContext::MD_prof,
8901 MDBuilder(Br1->getContext())
8902 .createBranchWeights(TrueWeight, FalseWeight));
8903
8904 NewTrueWeight = 2 * TrueWeight;
8905 NewFalseWeight = FalseWeight;
8906 scaleWeights(NewTrueWeight, NewFalseWeight);
8907 Br2->setMetadata(LLVMContext::MD_prof,
8908 MDBuilder(Br2->getContext())
8909 .createBranchWeights(TrueWeight, FalseWeight));
8910 }
8911 }
8912
8913 ModifiedDT = ModifyDT::ModifyBBDT;
8914 MadeChange = true;
8915
8916 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
8917 TmpBB->dump());
8918 }
8919 return MadeChange;
8920}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static unsigned getIntrinsicID(const SDNode *N)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
Optimize for code generation
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static void replaceAllUsesWith(Value *Old, Value *New, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, DenseMap< GCRelocateInst *, SmallVector< GCRelocateInst *, 2 > > &RelocateInstMap)
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static void DbgInserterHelper(DbgValueInst *DVI, Instruction *VI)
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches."))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
#define DEBUG_TYPE
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:148
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
Hexagon Common GEP
IRTranslator LLVM IR MI
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1497
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned logBase2() const
Definition: APInt.h:1703
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
an instruction to allocate memory on the stack
Definition: Instructions.h:59
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:132
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:125
void setAlignment(Align Align)
Definition: Instructions.h:136
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:492
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
static unsigned getPointerOperandIndex()
Definition: Instructions.h:675
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
static unsigned getPointerOperandIndex()
Definition: Instructions.h:912
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:640
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:360
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:570
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:460
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:482
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:276
void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:379
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:65
void reinsertInstInDbgRecords(Instruction *I, std::optional< DbgRecord::self_iterator > Pos)
In rare circumstances instructions can be speculatively removed from blocks, and then be re-inserted ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:513
This class represents a no-op cast from one type to another.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1809
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1828
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1687
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1692
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1678
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:601
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:983
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
static CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Base class for constants with no operands.
Definition: Constants.h:52
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1017
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2140
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2523
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:160
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1449
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
This represents the llvm.dbg.value instruction.
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
This class implements simplifications for calls to fortified library functions (__st*cpy_chk,...
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
const BasicBlock & getEntryBlock() const
Definition: Function.h:787
const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Represents a gc.statepoint intrinsic call.
Definition: Statepoint.h:61
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
Definition: Globals.cpp:128
bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition: Globals.cpp:295
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
Type * getValueType() const
Definition: GlobalValue.h:296
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2137
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:466
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * createIsFPClass(Value *FPNum, unsigned Test)
Definition: IRBuilder.cpp:1309
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2127
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:502
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:91
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:812
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:149
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1636
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
bool isShift() const
Definition: Instruction.h:259
std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Machine Value Type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
iterator find(const KeyT &Key)
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:79
void clear()
Definition: MapVector.h:88
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memcpy/memmove intrinsics.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:756
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
PointerIntPair - This class implements a pair of a pointer and small integer.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool erase(const T &V)
Definition: SmallSet.h:207
void clear()
Definition: SmallSet.h:218
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:236
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
static unsigned getPointerOperandIndex()
Definition: Instructions.h:419
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:651
Class to represent struct types.
Definition: DerivedTypes.h:216
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
SelectSupportKind
Enum that describes what type of support for selects the target has.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
virtual bool getAddrModeArguments(IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual bool ExpandInlineAsm(CallInst *) const
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:243
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
'undef' values are things that do not have specified contents.
Definition: Constants.h:1348
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
See the file comment.
Definition: ValueMap.h:84
void clear()
Definition: ValueMap.h:145
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:736
user_iterator user_begin()
Definition: Value.h:397
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition: Value.cpp:234
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition: Value.cpp:153
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
bool use_empty() const
Definition: Value.h:344
user_iterator user_end()
Definition: Value.h:405
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
iterator_range< use_iterator > uses()
Definition: Value.h:376
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:815
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5239
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
This class represents zero extension of integer types.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr bool isNonZero() const
Definition: TypeSize.h:158
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
TypeSize getSequentialElementStride(const DataLayout &DL) const
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:485
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:933
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:777
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:836
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:553
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:560
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
Definition: ScaledNumber.h:252
ManagedStatic< cl::opt< FnT >, OptCreatorT > Action
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:456
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:130
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2165
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1650
auto successors(const MachineBasicBlock *BB)
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:141
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:241
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2059
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition: Local.cpp:3973
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition: Analysis.cpp:199
bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:50
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition: Analysis.cpp:581
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
std::pair< Value *, FPClassTest > fcmpToClassTest(CmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:628
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
ExtAddrMode()=default
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
const DataLayout & DL
Definition: SimplifyQuery.h:61
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.