llvm.org GIT mirror llvm / d369820
AMDGPU/InsertWaitcnts: Cleanup some old cruft (NFCI) Summary: Remove redundant logic and simplify control flow. Reviewers: msearles, rampitec, scott.linder, kanarayan Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D54086 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346363 91177308-0d34-0410-b5e6-96231b3b80d8 Nicolai Haehnle 10 months ago
1 changed file(s) with 78 addition(s) and 98 deletion(s). Raw diff Collapse all Expand all
879879 // Start with an assumption that there is no need to emit.
880880 unsigned int EmitWaitcnt = 0;
881881
882 // No need to wait before phi. If a phi-move exists, then the wait should
883 // has been inserted before the move. If a phi-move does not exist, then
884 // wait should be inserted before the real use. The same is true for
885 // sc-merge. It is not a coincident that all these cases correspond to the
886 // instructions that are skipped in the assembling loop.
887 bool NeedLineMapping = false; // TODO: Check on this.
888
889882 // ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug
890883 bool ForceEmitZeroWaitcnt = false;
891884
892885 setForceEmitWaitcnt();
893886 bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
894887
895 if (MI.isDebugInstr() &&
896 // TODO: any other opcode?
897 !NeedLineMapping) {
888 if (MI.isDebugInstr())
898889 return;
899 }
900890
901891 // See if an s_waitcnt is forced at block entry, or is needed at
902892 // program end.
11401130 if (EmitWaitcnt || IsForceEmitWaitcnt) {
11411131 int CntVal[NUM_INST_CNTS];
11421132
1143 bool UseDefaultWaitcntStrategy = true;
11441133 if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) {
11451134 // Force all waitcnts to 0.
11461135 for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
11501139 CntVal[VM_CNT] = 0;
11511140 CntVal[EXP_CNT] = 0;
11521141 CntVal[LGKM_CNT] = 0;
1153 UseDefaultWaitcntStrategy = false;
1154 }
1155
1156 if (UseDefaultWaitcntStrategy) {
1142 } else {
11571143 for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
11581144 T = (enum InstCounterType)(T + 1)) {
11591145 if (EmitWaitcnt & CNT_MASK(T)) {
11771163 }
11781164 }
11791165
1180 // If we are not waiting on any counter we can skip the wait altogether.
1181 if (EmitWaitcnt != 0 || IsForceEmitWaitcnt) {
1182 MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
1183 int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
1184 if (!OldWaitcnt ||
1185 (AMDGPU::decodeVmcnt(IV, Imm) !=
1186 (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
1187 (AMDGPU::decodeExpcnt(IV, Imm) !=
1188 (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
1189 (AMDGPU::decodeLgkmcnt(IV, Imm) !=
1190 (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
1191 MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
1192 if (ContainingLoop) {
1193 MachineBasicBlock *TBB = ContainingLoop->getHeader();
1194 BlockWaitcntBrackets *ScoreBracket =
1195 BlockWaitcntBracketsMap[TBB].get();
1196 if (!ScoreBracket) {
1197 assert(!BlockVisitedSet.count(TBB));
1198 BlockWaitcntBracketsMap[TBB] =
1199 llvm::make_unique(ST);
1200 ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
1201 }
1202 ScoreBracket->setRevisitLoop(true);
1203 LLVM_DEBUG(dbgs()
1204 << "set-revisit2: Block"
1205 << ContainingLoop->getHeader()->getNumber() << '\n';);
1166 MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
1167 int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
1168 if (!OldWaitcnt ||
1169 (AMDGPU::decodeVmcnt(IV, Imm) !=
1170 (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
1171 (AMDGPU::decodeExpcnt(IV, Imm) !=
1172 (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
1173 (AMDGPU::decodeLgkmcnt(IV, Imm) !=
1174 (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
1175 MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
1176 if (ContainingLoop) {
1177 MachineBasicBlock *TBB = ContainingLoop->getHeader();
1178 BlockWaitcntBrackets *ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
1179 if (!ScoreBracket) {
1180 assert(!BlockVisitedSet.count(TBB));
1181 BlockWaitcntBracketsMap[TBB] =
1182 llvm::make_unique(ST);
1183 ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
12061184 }
1207 }
1208
1209 // Update an existing waitcount, or make a new one.
1210 unsigned Enc = AMDGPU::encodeWaitcnt(IV,
1185 ScoreBracket->setRevisitLoop(true);
1186 LLVM_DEBUG(dbgs() << "set-revisit2: Block"
1187 << ContainingLoop->getHeader()->getNumber() << '\n';);
1188 }
1189 }
1190
1191 // Update an existing waitcount, or make a new one.
1192 unsigned Enc = AMDGPU::encodeWaitcnt(IV,
12111193 ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
12121194 ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
12131195 ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
1214 // We don't remove waitcnts that existed prior to the waitcnt
1215 // pass. Check if the waitcnt to-be-inserted can be avoided
1216 // or if the prev waitcnt can be updated.
1217 bool insertSWaitInst = true;
1218 for (MachineBasicBlock::iterator I = MI.getIterator(),
1219 B = MI.getParent()->begin();
1220 insertSWaitInst && I != B; --I) {
1221 if (I == MI.getIterator())
1222 continue;
1223
1224 switch (I->getOpcode()) {
1225 case AMDGPU::S_WAITCNT:
1226 if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
1227 insertSWaitInst = false;
1228 else if (!OldWaitcnt) {
1229 OldWaitcnt = &*I;
1230 Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
1231 }
1232 break;
1233 // TODO: skip over instructions which never require wait.
1196 // We don't remove waitcnts that existed prior to the waitcnt
1197 // pass. Check if the waitcnt to-be-inserted can be avoided
1198 // or if the prev waitcnt can be updated.
1199 bool insertSWaitInst = true;
1200 for (MachineBasicBlock::iterator I = MI.getIterator(),
1201 B = MI.getParent()->begin();
1202 insertSWaitInst && I != B; --I) {
1203 if (I == MI.getIterator())
1204 continue;
1205
1206 switch (I->getOpcode()) {
1207 case AMDGPU::S_WAITCNT:
1208 if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
1209 insertSWaitInst = false;
1210 else if (!OldWaitcnt) {
1211 OldWaitcnt = &*I;
1212 Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
12341213 }
12351214 break;
1236 }
1237 if (insertSWaitInst) {
1238 if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
1239 if (ForceEmitZeroWaitcnts)
1240 LLVM_DEBUG(
1241 dbgs()
1242 << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
1243 if (IsForceEmitWaitcnt)
1244 LLVM_DEBUG(dbgs()
1245 << "Force emit a s_waitcnt due to debug counter\n");
1246
1247 OldWaitcnt->getOperand(0).setImm(Enc);
1248 if (!OldWaitcnt->getParent())
1249 MI.getParent()->insert(MI, OldWaitcnt);
1250
1251 LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
1252 << "Old Instr: " << MI << '\n'
1253 << "New Instr: " << *OldWaitcnt << '\n');
1254 } else {
1255 auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
1256 MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
1215 // TODO: skip over instructions which never require wait.
1216 }
1217 break;
1218 }
1219 if (insertSWaitInst) {
1220 if (OldWaitcnt) {
1221 assert(OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT);
1222 if (ForceEmitZeroWaitcnts)
1223 LLVM_DEBUG(dbgs()
1224 << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
1225 if (IsForceEmitWaitcnt)
1226 LLVM_DEBUG(dbgs() << "Force emit a s_waitcnt due to debug counter\n");
1227
1228 OldWaitcnt->getOperand(0).setImm(Enc);
1229 if (!OldWaitcnt->getParent())
1230 MI.getParent()->insert(MI, OldWaitcnt);
1231
1232 LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
1233 << "Old Instr: " << MI << '\n'
1234 << "New Instr: " << *OldWaitcnt << '\n');
1235 } else {
1236 auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
1237 MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
12571238 .addImm(Enc);
1258 TrackedWaitcntSet.insert(SWaitInst);
1259
1260 LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
1261 << "Old Instr: " << MI << '\n'
1262 << "New Instr: " << *SWaitInst << '\n');
1263 }
1264 }
1265
1266 if (CntVal[EXP_CNT] == 0) {
1267 ScoreBrackets->setMixedExpTypes(false);
1268 }
1239 TrackedWaitcntSet.insert(SWaitInst);
1240
1241 LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
1242 << "Old Instr: " << MI << '\n'
1243 << "New Instr: " << *SWaitInst << '\n');
1244 }
1245 }
1246
1247 if (CntVal[EXP_CNT] == 0) {
1248 ScoreBrackets->setMixedExpTypes(false);
12691249 }
12701250 }
12711251 }