llvm.org GIT mirror llvm / c7a23a5
[AMDGPU] Refactor waitcnt encoding - Refactor bit packing/unpacking - Calculate bit mask given bit shift and bit width - Introduce function for decoding bits of waitcnt - Introduce function for encoding bits of waitcnt - Introduce function for getting waitcnt mask (instead of using bare numbers) - Introduce function fot getting max waitcnt(s) (instead of using bare numbers) Differential Revision: https://reviews.llvm.org/D25298 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@283919 91177308-0d34-0410-b5e6-96231b3b80d8 Konstantin Zhuravlyov 3 years ago
5 changed file(s) with 182 addition(s) and 77 deletion(s). Raw diff Collapse all Expand all
20152015 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
20162016 Parser.Lex();
20172017
2018 int CntShift;
2019 int CntMask;
2020
20212018 IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
2022 if (CntName == "vmcnt") {
2023 CntMask = getVmcntMask(IV);
2024 CntShift = getVmcntShift(IV);
2025 } else if (CntName == "expcnt") {
2026 CntMask = getExpcntMask(IV);
2027 CntShift = getExpcntShift(IV);
2028 } else if (CntName == "lgkmcnt") {
2029 CntMask = getLgkmcntMask(IV);
2030 CntShift = getLgkmcntShift(IV);
2031 } else {
2019 if (CntName == "vmcnt")
2020 IntVal = encodeVmcnt(IV, IntVal, CntVal);
2021 else if (CntName == "expcnt")
2022 IntVal = encodeExpcnt(IV, IntVal, CntVal);
2023 else if (CntName == "lgkmcnt")
2024 IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
2025 else
20322026 return true;
2033 }
2034
2035 IntVal &= ~(CntMask << CntShift);
2036 IntVal |= (CntVal << CntShift);
2027
20372028 return false;
20382029 }
20392030
20402031 AMDGPUAsmParser::OperandMatchResultTy
20412032 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
2042 // Disable all counters by default.
2043 // vmcnt [3:0]
2044 // expcnt [6:4]
2045 // lgkmcnt [11:8]
2046 int64_t CntVal = 0xf7f;
2033 IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
2034 int64_t Waitcnt = getWaitcntBitMask(IV);
20472035 SMLoc S = Parser.getTok().getLoc();
20482036
20492037 switch(getLexer().getKind()) {
20502038 default: return MatchOperand_ParseFail;
20512039 case AsmToken::Integer:
20522040 // The operand can be an integer value.
2053 if (getParser().parseAbsoluteExpression(CntVal))
2041 if (getParser().parseAbsoluteExpression(Waitcnt))
20542042 return MatchOperand_ParseFail;
20552043 break;
20562044
20572045 case AsmToken::Identifier:
20582046 do {
2059 if (parseCnt(CntVal))
2047 if (parseCnt(Waitcnt))
20602048 return MatchOperand_ParseFail;
20612049 } while(getLexer().isNot(AsmToken::EndOfStatement));
20622050 break;
20632051 }
2064 Operands.push_back(AMDGPUOperand::CreateImm(this, CntVal, S));
2052 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
20652053 return MatchOperand_Success;
20662054 }
20672055
873873 IsaVersion IV = getIsaVersion(STI.getFeatureBits());
874874
875875 unsigned SImm16 = MI->getOperand(OpNo).getImm();
876 unsigned Vmcnt = (SImm16 >> getVmcntShift(IV)) & getVmcntMask(IV);
877 unsigned Expcnt = (SImm16 >> getExpcntShift(IV)) & getExpcntMask(IV);
878 unsigned Lgkmcnt = (SImm16 >> getLgkmcntShift(IV)) & getLgkmcntMask(IV);
876 unsigned Vmcnt, Expcnt, Lgkmcnt;
877 decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt);
879878
880879 bool NeedSpace = false;
881880
882 if (Vmcnt != 0xF) {
881 if (Vmcnt != getVmcntBitMask(IV)) {
883882 O << "vmcnt(" << Vmcnt << ')';
884883 NeedSpace = true;
885884 }
886885
887 if (Expcnt != 0x7) {
886 if (Expcnt != getExpcntBitMask(IV)) {
888887 if (NeedSpace)
889888 O << ' ';
890889 O << "expcnt(" << Expcnt << ')';
891890 NeedSpace = true;
892891 }
893892
894 if (Lgkmcnt != 0xF) {
893 if (Lgkmcnt != getLgkmcntBitMask(IV)) {
895894 if (NeedSpace)
896895 O << ' ';
897896 O << "lgkmcnt(" << Lgkmcnt << ')';
6262 const MachineRegisterInfo *MRI;
6363 IsaVersion IV;
6464
65 /// \brief Constant hardware limits
66 static const Counters WaitCounts;
67
6865 /// \brief Constant zero value
6966 static const Counters ZeroCounts;
67
68 /// \brief Hardware limits
69 Counters HardwareLimits;
7070
7171 /// \brief Counter values we have already waited on.
7272 Counters WaitedOn;
172172 return new SIInsertWaits();
173173 }
174174
175 const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
176175 const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
177176
178177 static bool readsVCCZ(unsigned Opcode) {
378377 Ordered[2] = false;
379378
380379 // The values we are going to put into the S_WAITCNT instruction
381 Counters Counts = WaitCounts;
380 Counters Counts = HardwareLimits;
382381
383382 // Do we really need to wait?
384383 bool NeedWait = false;
394393 unsigned Value = LastIssued.Array[i] - Required.Array[i];
395394
396395 // Adjust the value to the real hardware possibilities.
397 Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
396 Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]);
398397
399398 } else
400399 Counts.Array[i] = 0;
412411
413412 // Build the wait instruction
414413 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
415 .addImm(((Counts.Named.VM & getVmcntMask(IV)) << getVmcntShift(IV)) |
416 ((Counts.Named.EXP & getExpcntMask(IV)) << getExpcntShift(IV)) |
417 ((Counts.Named.LGKM & getLgkmcntMask(IV)) << getLgkmcntShift(IV)));
414 .addImm(encodeWaitcnt(IV,
415 Counts.Named.VM,
416 Counts.Named.EXP,
417 Counts.Named.LGKM));
418418
419419 LastOpcodeType = OTHER;
420420 LastInstWritesM0 = false;
442442 unsigned Imm = I->getOperand(0).getImm();
443443 Counters Counts, WaitOn;
444444
445 Counts.Named.VM = (Imm >> getVmcntShift(IV)) & getVmcntMask(IV);
446 Counts.Named.EXP = (Imm >> getExpcntShift(IV)) & getExpcntMask(IV);
447 Counts.Named.LGKM = (Imm >> getLgkmcntShift(IV)) & getLgkmcntMask(IV);
445 Counts.Named.VM = decodeVmcnt(IV, Imm);
446 Counts.Named.EXP = decodeExpcnt(IV, Imm);
447 Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
448448
449449 for (unsigned i = 0; i < 3; ++i) {
450450 if (Counts.Array[i] <= LastIssued.Array[i])
521521 TRI = &TII->getRegisterInfo();
522522 MRI = &MF.getRegInfo();
523523 IV = getIsaVersion(ST->getFeatureBits());
524
525 HardwareLimits.Named.VM = getVmcntBitMask(IV);
526 HardwareLimits.Named.EXP = getExpcntBitMask(IV);
527 HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
524528
525529 WaitedOn = ZeroCounts;
526530 DelayedWaitOn = ZeroCounts;
3232 #undef GET_INSTRINFO_NAMED_OPS
3333 #undef GET_INSTRINFO_ENUM
3434
35 namespace {
36
37 /// \returns Bit mask for given bit \p Shift and bit \p Width.
38 unsigned getBitMask(unsigned Shift, unsigned Width) {
39 return ((1 << Width) - 1) << Shift;
40 }
41
42 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
43 ///
44 /// \returns Packed \p Dst.
45 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
46 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
47 Dst |= (Src << Shift) & getBitMask(Shift, Width);
48 return Dst;
49 }
50
51 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
52 ///
53 /// \returns Unpacked bits.
54 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
55 return (Src & getBitMask(Shift, Width)) >> Shift;
56 }
57
58 /// \returns Vmcnt bit shift.
59 unsigned getVmcntBitShift() { return 0; }
60
61 /// \returns Vmcnt bit width.
62 unsigned getVmcntBitWidth() { return 4; }
63
64 /// \returns Expcnt bit shift.
65 unsigned getExpcntBitShift() { return 4; }
66
67 /// \returns Expcnt bit width.
68 unsigned getExpcntBitWidth() { return 3; }
69
70 /// \returns Lgkmcnt bit shift.
71 unsigned getLgkmcntBitShift() { return 8; }
72
73 /// \returns Lgkmcnt bit width.
74 unsigned getLgkmcntBitWidth() { return 4; }
75
76 } // anonymous namespace
77
3578 namespace llvm {
3679 namespace AMDGPU {
3780
160203 return Ints;
161204 }
162205
163 unsigned getVmcntMask(IsaVersion Version) {
164 return 0xf;
165 }
166
167 unsigned getVmcntShift(IsaVersion Version) {
168 return 0;
169 }
170
171 unsigned getExpcntMask(IsaVersion Version) {
172 return 0x7;
173 }
174
175 unsigned getExpcntShift(IsaVersion Version) {
176 return 4;
177 }
178
179 unsigned getLgkmcntMask(IsaVersion Version) {
180 return 0xf;
181 }
182
183 unsigned getLgkmcntShift(IsaVersion Version) {
184 return 8;
206 unsigned getWaitcntBitMask(IsaVersion Version) {
207 unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
208 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
209 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
210 return Vmcnt | Expcnt | Lgkmcnt;
211 }
212
213 unsigned getVmcntBitMask(IsaVersion Version) {
214 return (1 << getVmcntBitWidth()) - 1;
215 }
216
217 unsigned getExpcntBitMask(IsaVersion Version) {
218 return (1 << getExpcntBitWidth()) - 1;
219 }
220
221 unsigned getLgkmcntBitMask(IsaVersion Version) {
222 return (1 << getLgkmcntBitWidth()) - 1;
223 }
224
225 unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
226 return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
227 }
228
229 unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
230 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
231 }
232
233 unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
234 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
235 }
236
237 void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
238 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
239 Vmcnt = decodeVmcnt(Version, Waitcnt);
240 Expcnt = decodeExpcnt(Version, Waitcnt);
241 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
242 }
243
244 unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
245 return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
246 }
247
248 unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
249 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
250 }
251
252 unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
253 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
254 }
255
256 unsigned encodeWaitcnt(IsaVersion Version,
257 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
258 unsigned Waitcnt = getWaitcntBitMask(Version);;
259 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
260 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
261 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
262 return Waitcnt;
185263 }
186264
187265 unsigned getInitialPSInputAddr(const Function &F) {
7575 std::pair Default,
7676 bool OnlyFirstRequired = false);
7777
78 /// \returns VMCNT bit mask for given isa \p Version.
79 unsigned getVmcntMask(IsaVersion Version);
78 /// \returns Waitcnt bit mask for given isa \p Version.
79 unsigned getWaitcntBitMask(IsaVersion Version);
8080
81 /// \returns VMCNT bit shift for given isa \p Version.
82 unsigned getVmcntShift(IsaVersion Version);
81 /// \returns Vmcnt bit mask for given isa \p Version.
82 unsigned getVmcntBitMask(IsaVersion Version);
8383
84 /// \returns EXPCNT bit mask for given isa \p Version.
85 unsigned getExpcntMask(IsaVersion Version);
84 /// \returns Expcnt bit mask for given isa \p Version.
85 unsigned getExpcntBitMask(IsaVersion Version);
8686
87 /// \returns EXPCNT bit shift for given isa \p Version.
88 unsigned getExpcntShift(IsaVersion Version);
87 /// \returns Lgkmcnt bit mask for given isa \p Version.
88 unsigned getLgkmcntBitMask(IsaVersion Version);
8989
90 /// \returns LGKMCNT bit mask for given isa \p Version.
91 unsigned getLgkmcntMask(IsaVersion Version);
90 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
91 unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
9292
93 /// \returns LGKMCNT bit shift for given isa \p Version.
94 unsigned getLgkmcntShift(IsaVersion Version);
93 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
94 unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
95
96 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
97 unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
98
99 /// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
100 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
101 /// \p Lgkmcnt respectively.
102 ///
103 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
104 /// \p Vmcnt = \p Waitcnt[3:0]
105 /// \p Expcnt = \p Waitcnt[6:4]
106 /// \p Lgkmcnt = \p Waitcnt[11:8]
107 void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
108 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
109
110 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
111 unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
112
113 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
114 unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
115
116 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
117 unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
118
119 /// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
120 /// \p Version.
121 ///
122 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
123 /// Waitcnt[3:0] = \p Vmcnt
124 /// Waitcnt[6:4] = \p Expcnt
125 /// Waitcnt[11:8] = \p Lgkmcnt
126 ///
127 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
128 /// isa \p Version.
129 unsigned encodeWaitcnt(IsaVersion Version,
130 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
95131
96132 unsigned getInitialPSInputAddr(const Function &F);
97133