llvm.org GIT mirror llvm / f765312
[GlobalISel][Localizer] Rewrite localizer to run in 2 phases, inter & intra block. Inter-block localization is the same as what currently happens, except now it only runs on the entry block because that's where the problematic constants with long live ranges come from. The second phase is a new intra-block localization phase which attempts to re-sink the already localized instructions further right before one of the multiple uses. One additional change is to also localize G_GLOBAL_VALUE as they're constants too. However, on some targets like arm64 it takes multiple instructions to materialize the value, so some additional heuristics with a TTI hook have been introduced attempt to prevent code size regressions when localizing these. Overall, these changes improve CTMark code size on arm64 by 1.2%. Full code size results: Program baseline new diff ------------------------------------------------------------------------------ test-suite...-typeset/consumer-typeset.test 1249984 1217216 -2.6% test-suite...:: CTMark/ClamAV/clamscan.test 1264928 1232152 -2.6% test-suite :: CTMark/SPASS/SPASS.test 1394092 1361316 -2.4% test-suite...Mark/mafft/pairlocalalign.test 731320 714928 -2.2% test-suite :: CTMark/lencod/lencod.test 1340592 1324200 -1.2% test-suite :: CTMark/kimwitu++/kc.test 3853512 3820420 -0.9% test-suite :: CTMark/Bullet/bullet.test 3406036 3389652 -0.5% test-suite...ark/tramp3d-v4/tramp3d-v4.test 8017000 8016992 -0.0% test-suite...TMark/7zip/7zip-benchmark.test 2856588 2856588 0.0% test-suite...:: CTMark/sqlite3/sqlite3.test 765704 765704 0.0% Geomean difference -1.2% Differential Revision: https://reviews.llvm.org/D63303 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363632 91177308-0d34-0410-b5e6-96231b3b80d8 Amara Emerson a month ago
8 changed file(s) with 343 addition(s) and 64 deletion(s). Raw diff Collapse all Expand all
10521052 /// \returns True if the target wants to expand the given reduction intrinsic
10531053 /// into a shuffle sequence.
10541054 bool shouldExpandReduction(const IntrinsicInst *II) const;
1055
1056 /// \returns the size cost of rematerializing a GlobalValue address relative
1057 /// to a stack reload.
1058 unsigned getGISelRematGlobalCost() const;
1059
10551060 /// @}
10561061
10571062 private:
12681273 virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
12691274 ReductionFlags) const = 0;
12701275 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1276 virtual unsigned getGISelRematGlobalCost() const = 0;
12711277 virtual int getInstructionLatency(const Instruction *I) = 0;
12721278 };
12731279
17001706 bool shouldExpandReduction(const IntrinsicInst *II) const override {
17011707 return Impl.shouldExpandReduction(II);
17021708 }
1709
1710 unsigned getGISelRematGlobalCost() const override {
1711 return Impl.getGISelRematGlobalCost();
1712 }
1713
17031714 int getInstructionLatency(const Instruction *I) override {
17041715 return Impl.getInstructionLatency(I);
17051716 }
569569
570570 bool shouldExpandReduction(const IntrinsicInst *II) const {
571571 return true;
572 }
573
574 unsigned getGISelRematGlobalCost() const {
575 return 1;
572576 }
573577
574578 protected:
2626 namespace llvm {
2727 // Forward declarations.
2828 class MachineRegisterInfo;
29 class TargetTransformInfo;
2930
3031 /// This pass implements the localization mechanism described at the
3132 /// top of this file. One specificity of the implementation is that
4243 /// MRI contains all the register class/bank information that this
4344 /// pass uses and updates.
4445 MachineRegisterInfo *MRI;
46 /// TTI used for getting remat costs for instructions.
47 TargetTransformInfo *TTI;
4548
4649 /// Check whether or not \p MI needs to be moved close to its uses.
47 static bool shouldLocalize(const MachineInstr &MI);
50 bool shouldLocalize(const MachineInstr &MI);
4851
4952 /// Check if \p MOUse is used in the same basic block as \p Def.
5053 /// If the use is in the same block, we say it is local.
5558
5659 /// Initialize the field members using \p MF.
5760 void init(MachineFunction &MF);
61
62 /// Do inter-block localization from the entry block.
63 bool localizeInterBlock(MachineFunction &MF,
64 SmallPtrSetImpl &LocalizedInstrs);
65
66 /// Do intra-block localization of already localized instructions.
67 bool localizeIntraBlock(SmallPtrSetImpl &LocalizedInstrs);
5868
5969 public:
6070 Localizer();
721721
722722 bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
723723 return TTIImpl->shouldExpandReduction(II);
724 }
725
726 unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
727 return TTIImpl->getGISelRematGlobalCost();
724728 }
725729
726730 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
99 //===----------------------------------------------------------------------===//
1010
1111 #include "llvm/CodeGen/GlobalISel/Localizer.h"
12 #include "llvm/Analysis/TargetTransformInfo.h"
1213 #include "llvm/ADT/DenseMap.h"
1314 #include "llvm/ADT/SmallPtrSet.h"
1415 #include "llvm/CodeGen/MachineRegisterInfo.h"
1920 using namespace llvm;
2021
2122 char Localizer::ID = 0;
22 INITIALIZE_PASS(Localizer, DEBUG_TYPE,
23 "Move/duplicate certain instructions close to their use", false,
24 false)
23 INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE,
24 "Move/duplicate certain instructions close to their use",
25 false, false)
26 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
27 INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
28 "Move/duplicate certain instructions close to their use",
29 false, false)
2530
2631 Localizer::Localizer() : MachineFunctionPass(ID) {
2732 initializeLocalizerPass(*PassRegistry::getPassRegistry());
2833 }
2934
30 void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); }
35 void Localizer::init(MachineFunction &MF) {
36 MRI = &MF.getRegInfo();
37 TTI = &getAnalysis().getTTI(MF.getFunction());
38 }
3139
3240 bool Localizer::shouldLocalize(const MachineInstr &MI) {
41 // Assuming a spill and reload of a value has a cost of 1 instruction each,
42 // this helper function computes the maximum number of uses we should consider
43 // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
44 // break even in terms of code size when the original MI has 2 users vs
45 // choosing to potentially spill. Any more than 2 users we we have a net code
46 // size increase. This doesn't take into account register pressure though.
47 auto maxUses = [](unsigned RematCost) {
48 // A cost of 1 means remats are basically free.
49 if (RematCost == 1)
50 return UINT_MAX;
51 if (RematCost == 2)
52 return 2U;
53
54 // Remat is too expensive, only sink if there's one user.
55 if (RematCost > 2)
56 return 1U;
57 llvm_unreachable("Unexpected remat cost");
58 };
59
60 // Helper to walk through uses and terminate if we've reached a limit. Saves
61 // us spending time traversing uses if all we want to know is if it's >= min.
62 auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
63 unsigned NumUses = 0;
64 auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end();
65 for (; UI != UE && NumUses < MaxUses; ++UI) {
66 NumUses++;
67 }
68 // If we haven't reached the end yet then there are more than MaxUses users.
69 return UI == UE;
70 };
71
3372 switch (MI.getOpcode()) {
3473 default:
3574 return false;
3978 case TargetOpcode::G_FCONSTANT:
4079 case TargetOpcode::G_FRAME_INDEX:
4180 return true;
81 case TargetOpcode::G_GLOBAL_VALUE: {
82 unsigned RematCost = TTI->getGISelRematGlobalCost();
83 unsigned Reg = MI.getOperand(0).getReg();
84 unsigned MaxUses = maxUses(RematCost);
85 if (MaxUses == UINT_MAX)
86 return true; // Remats are "free" so always localize.
87 bool B = isUsesAtMost(Reg, MaxUses);
88 return B;
89 }
4290 }
4391 }
4492
4593 void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
94 AU.addRequired();
4695 getSelectionDAGFallbackAnalysisUsage(AU);
4796 MachineFunctionPass::getAnalysisUsage(AU);
4897 }
56105 return InsertMBB == Def.getParent();
57106 }
58107
108 bool Localizer::localizeInterBlock(
109 MachineFunction &MF, SmallPtrSetImpl &LocalizedInstrs) {
110 bool Changed = false;
111 DenseMap, unsigned> MBBWithLocalDef;
112
113 // Since the IRTranslator only emits constants into the entry block, and the
114 // rest of the GISel pipeline generally emits constants close to their users,
115 // we only localize instructions in the entry block here. This might change if
116 // we start doing CSE across blocks.
117 auto &MBB = MF.front();
118 for (MachineInstr &MI : MBB) {
119 if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
120 continue;
121 LLVM_DEBUG(dbgs() << "Should localize: " << MI);
122 assert(MI.getDesc().getNumDefs() == 1 &&
123 "More than one definition not supported yet");
124 unsigned Reg = MI.getOperand(0).getReg();
125 // Check if all the users of MI are local.
126 // We are going to invalidation the list of use operands, so we
127 // can't use range iterator.
128 for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
129 MOIt != MOItEnd;) {
130 MachineOperand &MOUse = *MOIt++;
131 // Check if the use is already local.
132 MachineBasicBlock *InsertMBB;
133 LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
134 dbgs() << "Checking use: " << MIUse
135 << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
136 if (isLocalUse(MOUse, MI, InsertMBB))
137 continue;
138 LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
139 Changed = true;
140 auto MBBAndReg = std::make_pair(InsertMBB, Reg);
141 auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
142 if (NewVRegIt == MBBWithLocalDef.end()) {
143 // Create the localized instruction.
144 MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
145 LocalizedInstrs.insert(LocalizedMI);
146 MachineInstr &UseMI = *MOUse.getParent();
147 if (MRI->hasOneUse(Reg) && !UseMI.isPHI())
148 InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI);
149 else
150 InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
151 LocalizedMI);
152
153 // Set a new register for the definition.
154 unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
155 MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
156 LocalizedMI->getOperand(0).setReg(NewReg);
157 NewVRegIt =
158 MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
159 LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
160 }
161 LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
162 << '\n');
163 // Update the user reg.
164 MOUse.setReg(NewVRegIt->second);
165 }
166 }
167 return Changed;
168 }
169
170 bool Localizer::localizeIntraBlock(
171 SmallPtrSetImpl &LocalizedInstrs) {
172 bool Changed = false;
173
174 // For each already-localized instruction which has multiple users, then we
175 // scan the block top down from the current position until we hit one of them.
176
177 // FIXME: Consider doing inst duplication if live ranges are very long due to
178 // many users, but this case may be better served by regalloc improvements.
179
180 for (MachineInstr *MI : LocalizedInstrs) {
181 unsigned Reg = MI->getOperand(0).getReg();
182 MachineBasicBlock &MBB = *MI->getParent();
183 // If the instruction has a single use, we would have already moved it right
184 // before its user in localizeInterBlock().
185 if (MRI->hasOneUse(Reg))
186 continue;
187
188 // All of the user MIs of this reg.
189 SmallPtrSet Users;
190 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg))
191 Users.insert(&UseMI);
192
193 MachineBasicBlock::iterator II(MI);
194 ++II;
195 while (II != MBB.end() && !Users.count(&*II))
196 ++II;
197
198 LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II
199 << "\n");
200 assert(II != MBB.end() && "Didn't find the user in the MBB");
201 MI->removeFromParent();
202 MBB.insert(II, MI);
203 Changed = true;
204 }
205 return Changed;
206 }
207
59208 bool Localizer::runOnMachineFunction(MachineFunction &MF) {
60209 // If the ISel pipeline failed, do not bother running that pass.
61210 if (MF.getProperties().hasProperty(
66215
67216 init(MF);
68217
69 bool Changed = false;
70 // Keep track of the instructions we localized.
71 // We won't need to process them if we see them later in the CFG.
72 SmallPtrSet LocalizedInstrs;
73 DenseMap, unsigned> MBBWithLocalDef;
74 // TODO: Do bottom up traversal.
75 for (MachineBasicBlock &MBB : MF) {
76 for (MachineInstr &MI : MBB) {
77 if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
78 continue;
79 LLVM_DEBUG(dbgs() << "Should localize: " << MI);
80 assert(MI.getDesc().getNumDefs() == 1 &&
81 "More than one definition not supported yet");
82 unsigned Reg = MI.getOperand(0).getReg();
83 // Check if all the users of MI are local.
84 // We are going to invalidation the list of use operands, so we
85 // can't use range iterator.
86 for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
87 MOIt != MOItEnd;) {
88 MachineOperand &MOUse = *MOIt++;
89 // Check if the use is already local.
90 MachineBasicBlock *InsertMBB;
91 LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
92 dbgs() << "Checking use: " << MIUse
93 << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
94 if (isLocalUse(MOUse, MI, InsertMBB))
95 continue;
96 LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
97 Changed = true;
98 auto MBBAndReg = std::make_pair(InsertMBB, Reg);
99 auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
100 if (NewVRegIt == MBBWithLocalDef.end()) {
101 // Create the localized instruction.
102 MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
103 LocalizedInstrs.insert(LocalizedMI);
104 // Don't try to be smart for the insertion point.
105 // There is no guarantee that the first seen use is the first
106 // use in the block.
107 InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
108 LocalizedMI);
109
110 // Set a new register for the definition.
111 unsigned NewReg =
112 MRI->createGenericVirtualRegister(MRI->getType(Reg));
113 MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
114 LocalizedMI->getOperand(0).setReg(NewReg);
115 NewVRegIt =
116 MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
117 LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
118 }
119 LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
120 << '\n');
121 // Update the user reg.
122 MOUse.setReg(NewVRegIt->second);
123 }
124 }
125 }
126 return Changed;
127 }
218 // Keep track of the instructions we localized. We'll do a second pass of
219 // intra-block localization to further reduce live ranges.
220 SmallPtrSet LocalizedInstrs;
221
222 bool Changed = localizeInterBlock(MF, LocalizedInstrs);
223 return Changed |= localizeIntraBlock(LocalizedInstrs);
224 }
164164 return false;
165165 }
166166
167 unsigned getGISelRematGlobalCost() const {
168 return 2;
169 }
170
167171 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
168172 TTI::ReductionFlags Flags) const;
169173
0 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
1 ; RUN: llc -o - -verify-machineinstrs -O0 -global-isel -stop-after=localizer %s | FileCheck %s
2 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
3 target triple = "arm64-apple-ios5.0.0"
4
5 @var1 = common global i32 0, align 4
6 @var2 = common global i32 0, align 4
7 @var3 = common global i32 0, align 4
8 @var4 = common global i32 0, align 4
9
10 ; This is an ll test instead of MIR because -run-pass doesn't seem to support
11 ; initializing the target TTI which we need for this test.
12
13 ; Some of the instructions in entry block are dead after this pass so don't
14 ; strictly need to be checked for.
15
16 define i32 @foo() {
17 ; CHECK-LABEL: name: foo
18 ; CHECK: bb.1.entry:
19 ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
20 ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
21 ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
22 ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
23 ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
24 ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
25 ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
26 ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
27 ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1)
28 ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]]
29 ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
30 ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2
31 ; CHECK: G_BR %bb.3
32 ; CHECK: bb.2.if.then:
33 ; CHECK: successors: %bb.3(0x80000000)
34 ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
35 ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
36 ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2)
37 ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
38 ; CHECK: G_STORE [[C5]](s32), [[GV]](p0) :: (store 4 into @var1)
39 ; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
40 ; CHECK: G_STORE [[C4]](s32), [[GV4]](p0) :: (store 4 into @var3)
41 ; CHECK: G_STORE [[C5]](s32), [[GV]](p0) :: (store 4 into @var1)
42 ; CHECK: bb.3.if.end:
43 ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
44 ; CHECK: $w0 = COPY [[C6]](s32)
45 ; CHECK: RET_ReallyLR implicit $w0
46 entry:
47 %0 = load i32, i32* @var1, align 4
48 %cmp = icmp eq i32 %0, 1
49 br i1 %cmp, label %if.then, label %if.end
50
51 if.then:
52 store i32 2, i32* @var2, align 4
53 store i32 3, i32* @var1, align 4
54 store i32 2, i32* @var3, align 4
55 store i32 3, i32* @var1, align 4
56 br label %if.end
57
58 if.end:
59 ret i32 0
60 }
61
1414 define void @float_non_local_phi_use_followed_by_use_fi() { ret void }
1515 define void @non_local_phi() { ret void }
1616 define void @non_local_label() { ret void }
17
18 @var1 = common global i32 0, align 4
19 @var2 = common global i32 0, align 4
20 @var3 = common global i32 0, align 4
21 @var4 = common global i32 0, align 4
22
23 define i32 @intrablock_with_globalvalue() {
24 entry:
25 %0 = load i32, i32* @var1, align 4
26 %cmp = icmp eq i32 %0, 1
27 br i1 %cmp, label %if.then, label %if.end
28
29 if.then:
30 store i32 2, i32* @var2, align 4
31 store i32 3, i32* @var1, align 4
32 store i32 2, i32* @var3, align 4
33 store i32 3, i32* @var1, align 4
34 br label %if.end
35
36 if.end:
37 ret i32 0
38 }
39
1740 ...
1841
1942 ---
300323 %2:fpr(s32) = G_FADD %0, %1
301324 G_BR %bb.1
302325 ...
326 ---
327 name: intrablock_with_globalvalue
328 legalized: true
329 regBankSelected: true
330 tracksRegLiveness: true
331 body: |
332 ; CHECK-LABEL: name: intrablock_with_globalvalue
333 ; CHECK: bb.0.entry:
334 ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
335 ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
336 ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
337 ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
338 ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
339 ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
340 ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
341 ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
342 ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1)
343 ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]]
344 ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
345 ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1
346 ; CHECK: G_BR %bb.2
347 ; CHECK: bb.1.if.then:
348 ; CHECK: successors: %bb.2(0x80000000)
349 ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
350 ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
351 ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2)
352 ; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
353 ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
354 ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1)
355 ; CHECK: [[GV5:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
356 ; CHECK: G_STORE [[C4]](s32), [[GV5]](p0) :: (store 4 into @var3)
357 ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1)
358 ; CHECK: bb.2.if.end:
359 ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
360 ; CHECK: $w0 = COPY [[C6]](s32)
361 ; CHECK: RET_ReallyLR implicit $w0
362
363 ; Some of these instructions are dead. We're checking that the other instructions are
364 ; sunk immediately before their first user in the if.then block or as close as possible.
365 bb.1.entry:
366 %1:gpr(p0) = G_GLOBAL_VALUE @var1
367 %2:gpr(s32) = G_CONSTANT i32 1
368 %4:gpr(s32) = G_CONSTANT i32 2
369 %5:gpr(p0) = G_GLOBAL_VALUE @var2
370 %6:gpr(s32) = G_CONSTANT i32 3
371 %7:gpr(p0) = G_GLOBAL_VALUE @var3
372 %8:gpr(s32) = G_CONSTANT i32 0
373 %0:gpr(s32) = G_LOAD %1(p0) :: (load 4 from @var1)
374 %9:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2
375 %3:gpr(s1) = G_TRUNC %9(s32)
376 G_BRCOND %3(s1), %bb.2
377 G_BR %bb.3
378
379 bb.2.if.then:
380 G_STORE %4(s32), %5(p0) :: (store 4 into @var2)
381 G_STORE %6(s32), %1(p0) :: (store 4 into @var1)
382 G_STORE %4(s32), %7(p0) :: (store 4 into @var3)
383 G_STORE %6(s32), %1(p0) :: (store 4 into @var1)
384
385 bb.3.if.end:
386 $w0 = COPY %8(s32)
387 RET_ReallyLR implicit $w0
388
389 ...