llvm.org GIT mirror llvm / bf39a84
[AMDGPU] Simplify negated condition Optimize sequence: %sel = V_CNDMASK_B32_e64 0, 1, %cc %cmp = V_CMP_NE_U32 1, %1 $vcc = S_AND_B64 $exec, %cmp S_CBRANCH_VCC[N]Z => $vcc = S_ANDN2_B64 $exec, %cc S_CBRANCH_VCC[N]Z It is the negation pattern inserted by DAGCombiner::visitBRCOND() in the rebuildSetCC(). Differential Revision: https://reviews.llvm.org/D55402 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349003 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 10 months ago
5 changed file(s) with 727 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
102102 return SaveExecInst;
103103 }
104104
105 // Optimize sequence
106 // %sel = V_CNDMASK_B32_e64 0, 1, %cc
107 // %cmp = V_CMP_NE_U32 1, %1
108 // $vcc = S_AND_B64 $exec, %cmp
109 // S_CBRANCH_VCC[N]Z
110 // =>
111 // $vcc = S_ANDN2_B64 $exec, %cc
112 // S_CBRANCH_VCC[N]Z
113 //
114 // It is the negation pattern inserted by DAGCombiner::visitBRCOND() in the
115 // rebuildSetCC(). We start with S_CBRANCH to avoid exhaustive search, but
116 // only 3 first instructions are really needed. S_AND_B64 with exec is a
117 // required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive
118 // lanes.
119 //
120 // Returns %cc register on success.
121 static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
122 const GCNSubtarget &ST,
123 MachineRegisterInfo &MRI,
124 LiveIntervals *LIS) {
125 const SIRegisterInfo *TRI = ST.getRegisterInfo();
126 const SIInstrInfo *TII = ST.getInstrInfo();
127 const unsigned AndOpc = AMDGPU::S_AND_B64;
128 const unsigned Andn2Opc = AMDGPU::S_ANDN2_B64;
129 const unsigned CondReg = AMDGPU::VCC;
130 const unsigned ExecReg = AMDGPU::EXEC;
131
132 auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
133 unsigned Opc = MI.getOpcode();
134 return Opc == AMDGPU::S_CBRANCH_VCCZ ||
135 Opc == AMDGPU::S_CBRANCH_VCCNZ; });
136 if (I == MBB.terminators().end())
137 return AMDGPU::NoRegister;
138
139 auto *And = TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister,
140 *I, MRI, LIS);
141 if (!And || And->getOpcode() != AndOpc ||
142 !And->getOperand(1).isReg() || !And->getOperand(2).isReg())
143 return AMDGPU::NoRegister;
144
145 MachineOperand *AndCC = &And->getOperand(1);
146 unsigned CmpReg = AndCC->getReg();
147 unsigned CmpSubReg = AndCC->getSubReg();
148 if (CmpReg == ExecReg) {
149 AndCC = &And->getOperand(2);
150 CmpReg = AndCC->getReg();
151 CmpSubReg = AndCC->getSubReg();
152 } else if (And->getOperand(2).getReg() != ExecReg) {
153 return AMDGPU::NoRegister;
154 }
155
156 auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, MRI, LIS);
157 if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
158 Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
159 Cmp->getParent() != And->getParent())
160 return AMDGPU::NoRegister;
161
162 MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);
163 MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);
164 if (Op1->isImm() && Op2->isReg())
165 std::swap(Op1, Op2);
166 if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1)
167 return AMDGPU::NoRegister;
168
169 unsigned SelReg = Op1->getReg();
170 auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, MRI, LIS);
171 if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
172 return AMDGPU::NoRegister;
173
174 Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
175 Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
176 MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
177 if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() ||
178 Op1->getImm() != 0 || Op2->getImm() != 1)
179 return AMDGPU::NoRegister;
180
181 LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t'
182 << *Cmp << '\t' << *And);
183
184 unsigned CCReg = CC->getReg();
185 LIS->RemoveMachineInstrFromMaps(*And);
186 MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
187 TII->get(Andn2Opc), And->getOperand(0).getReg())
188 .addReg(ExecReg)
189 .addReg(CCReg, CC->getSubReg());
190 And->eraseFromParent();
191 LIS->InsertMachineInstrInMaps(*Andn2);
192
193 LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
194
195 // Try to remove compare. Cmp value should not used in between of cmp
196 // and s_and_b64 if VCC or just unused if any other register.
197 if ((TargetRegisterInfo::isVirtualRegister(CmpReg) &&
198 MRI.use_nodbg_empty(CmpReg)) ||
199 (CmpReg == CondReg &&
200 std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
201 [TRI, CondReg](const MachineInstr &MI) {
202 return MI.readsRegister(CondReg, TRI); }))) {
203 LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
204
205 LIS->RemoveMachineInstrFromMaps(*Cmp);
206 Cmp->eraseFromParent();
207
208 // Try to remove v_cndmask_b32.
209 if (TargetRegisterInfo::isVirtualRegister(SelReg) &&
210 MRI.use_nodbg_empty(SelReg)) {
211 LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
212
213 LIS->RemoveMachineInstrFromMaps(*Sel);
214 Sel->eraseFromParent();
215 }
216 }
217
218 return CCReg;
219 }
220
105221 bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
106222 if (skipFunction(MF.getFunction()))
107223 return false;
115231 bool Changed = false;
116232
117233 for (MachineBasicBlock &MBB : MF) {
234
235 if (unsigned Reg = optimizeVcndVcmpPair(MBB, ST, MRI, LIS)) {
236 RecalcRegs.insert(Reg);
237 RecalcRegs.insert(AMDGPU::VCC_LO);
238 RecalcRegs.insert(AMDGPU::VCC_HI);
239 RecalcRegs.insert(AMDGPU::SCC);
240 Changed = true;
241 }
118242
119243 // Try to remove unneeded instructions before s_endpgm.
120244 if (MBB.succ_empty()) {
1717 #include "SIInstrInfo.h"
1818 #include "SIMachineFunctionInfo.h"
1919 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20 #include "llvm/CodeGen/LiveIntervals.h"
21 #include "llvm/CodeGen/MachineDominators.h"
2022 #include "llvm/CodeGen/MachineFrameInfo.h"
2123 #include "llvm/CodeGen/MachineInstrBuilder.h"
2224 #include "llvm/CodeGen/RegisterScavenging.h"
25 #include "llvm/CodeGen/SlotIndexes.h"
2326 #include "llvm/IR/Function.h"
2427 #include "llvm/IR/LLVMContext.h"
2528
15981601 llvm_unreachable("not implemented");
15991602 }
16001603 }
1604
1605 // Find reaching register definition
1606 MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg,
1607 MachineInstr &Use,
1608 MachineRegisterInfo &MRI,
1609 LiveIntervals *LIS) const {
1610 auto &MDT = LIS->getAnalysis();
1611 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1612 SlotIndex DefIdx;
1613
1614 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1615 if (!LIS->hasInterval(Reg))
1616 return nullptr;
1617 LiveInterval &LI = LIS->getInterval(Reg);
1618 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1619 : MRI.getMaxLaneMaskForVReg(Reg);
1620 VNInfo *V = nullptr;
1621 if (LI.hasSubRanges()) {
1622 for (auto &S : LI.subranges()) {
1623 if ((S.LaneMask & SubLanes) == SubLanes) {
1624 V = S.getVNInfoAt(UseIdx);
1625 break;
1626 }
1627 }
1628 } else {
1629 V = LI.getVNInfoAt(UseIdx);
1630 }
1631 if (!V)
1632 return nullptr;
1633 DefIdx = V->def;
1634 } else {
1635 // Find last def.
1636 for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
1637 LiveRange &LR = LIS->getRegUnit(*Units);
1638 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
1639 if (!DefIdx.isValid() ||
1640 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
1641 LIS->getInstructionFromIndex(V->def)))
1642 DefIdx = V->def;
1643 } else {
1644 return nullptr;
1645 }
1646 }
1647 }
1648
1649 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
1650
1651 if (!Def || !MDT.dominates(Def, &Use))
1652 return nullptr;
1653
1654 assert(Def->modifiesRegister(Reg, this));
1655
1656 return Def;
1657 }
226226 const TargetRegisterClass *
227227 getConstrainedRegClassForOperand(const MachineOperand &MO,
228228 const MachineRegisterInfo &MRI) const override;
229
230 // Find reaching register definition
231 MachineInstr *findReachingDef(unsigned Reg, unsigned SubReg,
232 MachineInstr &Use,
233 MachineRegisterInfo &MRI,
234 LiveIntervals *LIS) const;
229235
230236 private:
231237 void buildSpillLoadStore(MachineBasicBlock::iterator MI,
0 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra -o - %s | FileCheck -check-prefix=GCN %s
1
2 # GCN: name: negated_cond_vop2
3 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
4 # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
5 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
6 ---
7 name: negated_cond_vop2
8 body: |
9 bb.0:
10 %0:sreg_64_xexec = IMPLICIT_DEF
11 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
12 V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
13 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
14 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
15 S_BRANCH %bb.1
16
17 bb.1:
18 S_BRANCH %bb.0
19
20 bb.2:
21 S_ENDPGM
22 ...
23
24 # GCN: name: negated_cond_vop3
25 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
26 # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
27 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
28 ---
29 name: negated_cond_vop3
30 body: |
31 bb.0:
32 %0:sreg_64_xexec = IMPLICIT_DEF
33 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
34 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
35 $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
36 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
37 S_BRANCH %bb.1
38
39 bb.1:
40 S_BRANCH %bb.0
41
42 bb.2:
43 S_ENDPGM
44 ...
45
46 # GCN: name: negated_cond_vop2_redef_vcc1
47 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
48 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
49 # GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
50 # GCN-NEXT: $vcc_lo = COPY $sgpr0
51 # GCN-NEXT: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
52 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
53 ---
54 name: negated_cond_vop2_redef_vcc1
55 body: |
56 bb.0:
57 %0:sreg_64_xexec = IMPLICIT_DEF
58 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
59 V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
60 $vcc_lo = COPY $sgpr0
61 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
62 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
63 S_BRANCH %bb.1
64
65 bb.1:
66 S_BRANCH %bb.0
67
68 bb.2:
69 S_ENDPGM
70 ...
71
72 # GCN: name: negated_cond_vop2_redef_vcc2
73 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
74 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
75 # GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
76 # GCN-NEXT: $vcc_hi = COPY $sgpr0
77 # GCN-NEXT: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
78 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
79 ---
80 name: negated_cond_vop2_redef_vcc2
81 body: |
82 bb.0:
83 %0:sreg_64_xexec = IMPLICIT_DEF
84 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
85 V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
86 $vcc_hi = COPY $sgpr0
87 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
88 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
89 S_BRANCH %bb.1
90
91 bb.1:
92 S_BRANCH %bb.0
93
94 bb.2:
95 S_ENDPGM
96 ...
97
98 # GCN: name: negated_cond_vop3_redef_cmp
99 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
100 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
101 # GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
102 # GCN-NEXT: %2.sub1:sreg_64_xexec = COPY $sgpr0
103 # GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc
104 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
105 ---
106 name: negated_cond_vop3_redef_cmp
107 body: |
108 bb.0:
109 %0:sreg_64_xexec = IMPLICIT_DEF
110 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
111 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
112 %2.sub1 = COPY $sgpr0
113 $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
114 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
115 S_BRANCH %bb.1
116
117 bb.1:
118 S_BRANCH %bb.0
119
120 bb.2:
121 S_ENDPGM
122 ...
123
124 # GCN: name: negated_cond_undef_vcc
125 # GCN: $vcc = S_AND_B64 $exec, undef $vcc, implicit-def dead $scc
126 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
127 ---
128 name: negated_cond_undef_vcc
129 body: |
130 bb.0:
131 $vcc = S_AND_B64 $exec, undef $vcc, implicit-def dead $scc
132 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
133 S_BRANCH %bb.1
134
135 bb.1:
136 S_BRANCH %bb.0
137
138 bb.2:
139 S_ENDPGM
140 ...
141
142 # GCN: name: negated_cond_vop3_imp_vcc
143 # GCN: $vcc = IMPLICIT_DEF
144 # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def $scc
145 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
146 ---
147 name: negated_cond_vop3_imp_vcc
148 body: |
149 bb.0:
150 $vcc = IMPLICIT_DEF
151 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, $vcc, implicit $exec
152 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
153 $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
154 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
155 S_BRANCH %bb.1
156
157 bb.1:
158 S_BRANCH %bb.0
159
160 bb.2:
161 S_ENDPGM
162 ...
163
164 # GCN: name: negated_cond_vop2_imp_vcc
165 # GCN: $vcc = IMPLICIT_DEF
166 # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def $scc
167 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
168 ---
169 name: negated_cond_vop2_imp_vcc
170 body: |
171 bb.0:
172 $vcc = IMPLICIT_DEF
173 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, $vcc, implicit $exec
174 V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
175 $vcc = S_AND_B64 killed $vcc, $exec, implicit-def dead $scc
176 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
177 S_BRANCH %bb.1
178
179 bb.1:
180 S_BRANCH %bb.0
181
182 bb.2:
183 S_ENDPGM
184 ...
185
186 # GCN: name: negated_cond_vop3_redef_sel
187 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
188 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
189 # GCN-NEXT: %1:vgpr_32 = COPY $vgpr0
190 # GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
191 # GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc
192 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
193 ---
194 name: negated_cond_vop3_redef_sel
195 body: |
196 bb.0:
197 %0:sreg_64_xexec = IMPLICIT_DEF
198 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
199 %1:vgpr_32 = COPY $vgpr0
200 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
201 $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
202 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
203 S_BRANCH %bb.1
204
205 bb.1:
206 S_BRANCH %bb.0
207
208 bb.2:
209 S_ENDPGM
210 ...
211
212 # GCN: name: negated_cond_vop2_used_sel
213 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
214 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
215 # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
216 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
217 ---
218 name: negated_cond_vop2_used_sel
219 body: |
220 bb.0:
221 %0:sreg_64_xexec = IMPLICIT_DEF
222 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
223 V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
224 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
225 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
226 S_BRANCH %bb.1
227
228 bb.1:
229 S_BRANCH %bb.0
230
231 bb.2:
232 $vgpr0 = COPY %1
233 S_ENDPGM
234 ...
235
236 # GCN: name: negated_cond_vop2_used_vcc
237 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
238 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
239 # GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
240 # GCN-NEXT: $sgpr0_sgpr1 = COPY $vcc
241 # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
242 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
243 ---
244 name: negated_cond_vop2_used_vcc
245 body: |
246 bb.0:
247 %0:sreg_64_xexec = IMPLICIT_DEF
248 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
249 V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
250 $sgpr0_sgpr1 = COPY $vcc
251 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
252 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
253 S_BRANCH %bb.1
254
255 bb.1:
256 S_BRANCH %bb.0
257
258 bb.2:
259 S_ENDPGM
260 ...
261
262 # GCN: name: negated_cond_vop3_sel_wrong_subreg1
263 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
264 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
265 # GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
266 # GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec
267 # GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc
268 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
269 ---
270 name: negated_cond_vop3_sel_wrong_subreg1
271 body: |
272 bb.0:
273 %0:sreg_64_xexec = IMPLICIT_DEF
274 %1.sub1 = IMPLICIT_DEF
275 %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
276 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec
277 $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
278 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
279 S_BRANCH %bb.1
280
281 bb.1:
282 S_BRANCH %bb.0
283
284 bb.2:
285 S_ENDPGM
286 ...
287
288 # GCN: name: negated_cond_vop3_sel_wrong_subreg2
289 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
290 # GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
291 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
292 # GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec
293 # GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc
294 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
295 ---
296 name: negated_cond_vop3_sel_wrong_subreg2
297 body: |
298 bb.0:
299 %0:sreg_64_xexec = IMPLICIT_DEF
300 %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
301 %1.sub1 = IMPLICIT_DEF
302 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec
303 $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
304 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
305 S_BRANCH %bb.1
306
307 bb.1:
308 S_BRANCH %bb.0
309
310 bb.2:
311 S_ENDPGM
312 ...
313
314 # GCN: name: negated_cond_vop3_sel_right_subreg1
315 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
316 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
317 # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
318 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
319 ---
320 name: negated_cond_vop3_sel_right_subreg1
321 body: |
322 bb.0:
323 %0:sreg_64_xexec = IMPLICIT_DEF
324 %1.sub1 = IMPLICIT_DEF
325 %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
326 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub0, 1, implicit $exec
327 $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
328 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
329 S_BRANCH %bb.1
330
331 bb.1:
332 S_BRANCH %bb.0
333
334 bb.2:
335 S_ENDPGM
336 ...
337
338 # GCN: name: negated_cond_vop3_sel_right_subreg2
339 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
340 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
341 # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
342 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
343 ---
344 name: negated_cond_vop3_sel_right_subreg2
345 body: |
346 bb.0:
347 %0:sreg_64_xexec = IMPLICIT_DEF
348 %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
349 %1.sub1 = IMPLICIT_DEF
350 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub0, 1, implicit $exec
351 $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
352 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
353 S_BRANCH %bb.1
354
355 bb.1:
356 S_BRANCH %bb.0
357
358 bb.2:
359 S_ENDPGM
360 ...
361
362 # GCN: name: negated_cond_vop3_sel_subreg_overlap
363 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
364 # GCN-NEXT: %1.sub2:vreg_128 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
365 # GCN-NEXT: %1.sub2_sub3:vreg_128 = IMPLICIT_DEF
366 # GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub2, 1, implicit $exec
367 # GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc
368 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
369 ---
370 name: negated_cond_vop3_sel_subreg_overlap
371 body: |
372 bb.0:
373 %0:sreg_64_xexec = IMPLICIT_DEF
374 %1.sub2:vreg_128 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
375 %1.sub2_sub3 = IMPLICIT_DEF
376 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub2, 1, implicit $exec
377 $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
378 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
379 S_BRANCH %bb.1
380
381 bb.1:
382 S_BRANCH %bb.0
383
384 bb.2:
385 S_ENDPGM
386 ...
387
388 # GCN: name: negated_cond_vop2_dominated_blocks
389 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF
390 # GCN: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
391 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc
392 ---
393 name: negated_cond_vop2_dominated_blocks
394 body: |
395 bb.0:
396 %0:sreg_64_xexec = IMPLICIT_DEF
397 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
398
399 bb.1:
400 V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
401 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
402 S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
403 S_BRANCH %bb.2
404
405 bb.2:
406 S_BRANCH %bb.1
407
408 bb.3:
409 S_ENDPGM
410 ...
411
412 # GCN: name: negated_cond_vop2_different_blocks_cmp_and
413 # GCN: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
414 # GCN: $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc
415 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc
416 ---
417 name: negated_cond_vop2_different_blocks_cmp_and
418 body: |
419 bb.0:
420 %0:sreg_64_xexec = IMPLICIT_DEF
421 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
422 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
423
424 bb.1:
425 $vcc = S_AND_B64 $exec, killed %2, implicit-def dead $scc
426 S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
427 S_BRANCH %bb.2
428
429 bb.2:
430 S_BRANCH %bb.1
431
432 bb.3:
433 S_ENDPGM
434 ...
435
436 # GCN: name: negated_cond_vop2_not_dominated_blocks
437 # GCN: V_CNDMASK_B32_e64 0, 1,
438 # GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
439 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
440 ---
441 name: negated_cond_vop2_not_dominated_blocks
442 body: |
443 bb.0:
444 $vcc = IMPLICIT_DEF
445 %1 = IMPLICIT_DEF
446 S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
447 S_BRANCH %bb.1
448
449 bb.1:
450 %0:sreg_64_xexec = IMPLICIT_DEF
451 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
452
453 bb.2:
454 V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
455 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
456 S_CBRANCH_VCCNZ %bb.4, implicit killed $vcc
457 S_BRANCH %bb.3
458
459 bb.3:
460 S_BRANCH %bb.2
461
462 bb.4:
463 S_ENDPGM
464 ...
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1
2 ; GCN-LABEL: {{^}}negated_cond:
3 ; GCN: BB0_1:
4 ; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]],
5 ; GCN: BB0_2:
6 ; GCN-NOT: v_cndmask_b32
7 ; GCN-NOT: v_cmp
8 ; GCN: s_andn2_b64 vcc, exec, [[CC]]
9 ; GCN: s_cbranch_vccnz BB0_4
10 define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) {
11 bb:
12 br label %bb1
13
14 bb1:
15 %tmp1 = load i32, i32 addrspace(1)* %arg1
16 %tmp2 = icmp eq i32 %tmp1, 0
17 br label %bb2
18
19 bb2:
20 %tmp3 = phi i32 [ 0, %bb1 ], [ %tmp6, %bb4 ]
21 %tmp4 = shl i32 %tmp3, 5
22 br i1 %tmp2, label %bb3, label %bb4
23
24 bb3:
25 %tmp5 = add i32 %tmp4, 1
26 br label %bb4
27
28 bb4:
29 %tmp6 = phi i32 [ %tmp5, %bb3 ], [ %tmp4, %bb2 ]
30 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tmp6
31 store i32 0, i32 addrspace(1)* %gep
32 %tmp7 = icmp eq i32 %tmp6, 32
33 br i1 %tmp7, label %bb1, label %bb2
34 }
35
36 ; GCN-LABEL: {{^}}negated_cond_dominated_blocks:
37 ; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]],
38 ; GCN: BB1_1:
39 ; GCN-NOT: v_cndmask_b32
40 ; GCN-NOT: v_cmp
41 ; GCN: s_andn2_b64 vcc, exec, [[CC]]
42 ; GCN: s_cbranch_vccz BB1_3
43 define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) {
44 bb:
45 br label %bb2
46
47 bb2:
48 %tmp1 = load i32, i32 addrspace(1)* %arg1
49 %tmp2 = icmp eq i32 %tmp1, 0
50 br label %bb4
51
52 bb3:
53 ret void
54
55 bb4:
56 %tmp3 = phi i32 [ 0, %bb2 ], [ %tmp7, %bb7 ]
57 %tmp4 = shl i32 %tmp3, 5
58 br i1 %tmp2, label %bb5, label %bb6
59
60 bb5:
61 %tmp5 = add i32 %tmp4, 1
62 br label %bb7
63
64 bb6:
65 %tmp6 = add i32 %tmp3, 1
66 br label %bb7
67
68 bb7:
69 %tmp7 = phi i32 [ %tmp5, %bb5 ], [ %tmp6, %bb6 ]
70 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tmp7
71 store i32 0, i32 addrspace(1)* %gep
72 %tmp8 = icmp eq i32 %tmp7, 32
73 br i1 %tmp8, label %bb3, label %bb4
74 }