llvm.org GIT mirror llvm / 0b58446
MC: Allow getMaxInstLength to depend on the subtarget Keep it optional in cases this is ever needed in some global context. Currently it's only used for getting an upper bound inline asm code size. For AMDGPU, gfx10 increases the maximum instruction size to 20-bytes. This avoids penalizing older subtargets when estimating code size, and making some annoying branch relaxation test adjustments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361405 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 9 months ago
11 changed file(s) with 91 addition(s) and 19 deletion(s). Raw diff Collapse all Expand all
12591259
12601260 /// Measure the specified inline asm to determine an approximation of its
12611261 /// length.
1262 virtual unsigned getInlineAsmLength(const char *Str,
1263 const MCAsmInfo &MAI) const;
1262 virtual unsigned getInlineAsmLength(
1263 const char *Str, const MCAsmInfo &MAI,
1264 const TargetSubtargetInfo *STI = nullptr) const;
12641265
12651266 /// Allocate and return a hazard recognizer to use for this target when
12661267 /// scheduling the machine instructions before register allocation.
2626 class MCExpr;
2727 class MCSection;
2828 class MCStreamer;
29 class MCSubtargetInfo;
2930 class MCSymbol;
3031
3132 namespace WinEH {
472473 bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; }
473474 bool hasCOFFAssociativeComdats() const { return HasCOFFAssociativeComdats; }
474475 bool hasCOFFComdatConstants() const { return HasCOFFComdatConstants; }
475 unsigned getMaxInstLength() const { return MaxInstLength; }
476
477 /// Returns the maximum possible encoded instruction size in bytes. If \p STI
478 /// is null, this should be the maximum size for any subtarget.
479 virtual unsigned getMaxInstLength(const MCSubtargetInfo *STI = nullptr) const {
480 return MaxInstLength;
481 }
482
476483 unsigned getMinInstAlignment() const { return MinInstAlignment; }
477484 bool getDollarIsPC() const { return DollarIsPC; }
478485 const char *getSeparatorString() const { return SeparatorString; }
8484 /// simple--i.e. not a logical or arithmetic expression--size values without
8585 /// the optional fill value. This is primarily used for creating arbitrary
8686 /// sized inline asm blocks for testing purposes.
87 unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
88 const MCAsmInfo &MAI) const {
87 unsigned TargetInstrInfo::getInlineAsmLength(
88 const char *Str,
89 const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const {
8990 // Count the number of instructions in the asm.
9091 bool AtInsnStart = true;
9192 unsigned Length = 0;
93 const unsigned MaxInstLength = MAI.getMaxInstLength(STI);
9294 for (; *Str; ++Str) {
9395 if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
9496 strlen(MAI.getSeparatorString())) == 0) {
100102 }
101103
102104 if (AtInsnStart && !std::isspace(static_cast(*Str))) {
103 unsigned AddLength = MAI.getMaxInstLength();
105 unsigned AddLength = MaxInstLength;
104106 if (strncmp(Str, ".space", 6) == 0) {
105107 char *EStr;
106108 int SpaceSize;
2727 #include "llvm/ADT/ArrayRef.h"
2828 #include "llvm/ADT/Twine.h"
2929 #include "llvm/BinaryFormat/ELF.h"
30 #include "llvm/MC/MCAsmInfo.h"
3031 #include "llvm/MC/MCContext.h"
3132 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
3233 #include "llvm/MC/MCExpr.h"
5556
5657 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
5758
59 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
60 MCContext &Ctx,
61 MCInstrInfo const *MCII) :
62 MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
63 TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {}
64
5865 inline static MCDisassembler::DecodeStatus
5966 addOperand(MCInst &Inst, const MCOperand& Opnd) {
6067 Inst.addOperand(Opnd);
185192 if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10())
186193 report_fatal_error("Disassembly not yet supported for subtarget");
187194
188 unsigned MaxInstBytesNum = (std::min)(
189 STI.getFeatureBits()[AMDGPU::FeatureGFX10] ? (size_t) 20 :
190 STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal] ? (size_t) 12 : (size_t)8,
191 Bytes_.size());
195 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
192196 Bytes = Bytes_.slice(0, MaxInstBytesNum);
193197
194198 DecodeStatus Res = MCDisassembler::Fail;
4040 private:
4141 std::unique_ptr const MCII;
4242 const MCRegisterInfo &MRI;
43 const unsigned TargetMaxInstBytes;
4344 mutable ArrayRef Bytes;
4445 mutable uint32_t Literal;
4546 mutable bool HasLiteral;
4647
4748 public:
4849 AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
49 MCInstrInfo const *MCII) :
50 MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()) {}
51
50 MCInstrInfo const *MCII);
5251 ~AMDGPUDisassembler() override = default;
5352
5453 DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
88
99 #include "AMDGPUMCAsmInfo.h"
1010 #include "llvm/ADT/Triple.h"
11 #include "llvm/MC/MCSubtargetInfo.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1113
1214 using namespace llvm;
1315
1719 HasSingleParameterDotFile = false;
1820 //===------------------------------------------------------------------===//
1921 MinInstAlignment = 4;
20 MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 8 : 16;
22
23 // This is the maximum instruction encoded size for gfx10. With a known
24 // subtarget, it can be reduced to 8 bytes.
25 MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 20 : 16;
2126 SeparatorString = "\n";
2227 CommentString = ";";
2328 PrivateLabelPrefix = "";
4348 SectionName == ".hsarodata_readonly_agent" ||
4449 MCAsmInfo::shouldOmitSectionDirective(SectionName);
4550 }
51
52 unsigned AMDGPUMCAsmInfo::getMaxInstLength(const MCSubtargetInfo *STI) const {
53 if (!STI || STI->getTargetTriple().getArch() == Triple::r600)
54 return MaxInstLength;
55
56 // Maximum for NSA encoded images
57 if (STI->getFeatureBits()[AMDGPU::FeatureNSAEncoding])
58 return 20;
59
60 // 64-bit instruction with 32-bit literal.
61 if (STI->getFeatureBits()[AMDGPU::FeatureVOP3Literal])
62 return 12;
63
64 return 8;
65 }
2626 public:
2727 explicit AMDGPUMCAsmInfo(const Triple &TT);
2828 bool shouldOmitSectionDirective(StringRef SectionName) const override;
29 unsigned getMaxInstLength(const MCSubtargetInfo *STI) const override;
2930 };
3031 } // namespace llvm
3132 #endif
55775577 case TargetOpcode::INLINEASM_BR: {
55785578 const MachineFunction *MF = MI.getParent()->getParent();
55795579 const char *AsmStr = MI.getOperand(0).getSymbolName();
5580 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
5580 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(),
5581 &MF->getSubtarget());
55815582 }
55825583 default:
55835584 return DescSize;
17111711 /// Hexagon counts the number of ##'s and adjust for that many
17121712 /// constant exenders.
17131713 unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
1714 const MCAsmInfo &MAI) const {
1714 const MCAsmInfo &MAI,
1715 const TargetSubtargetInfo *STI) const {
17151716 StringRef AStr(Str);
17161717 // Count the number of instructions in the asm.
17171718 bool atInsnStart = true;
17181719 unsigned Length = 0;
1720 const unsigned MaxInstLength = MAI.getMaxInstLength(STI);
17191721 for (; *Str; ++Str) {
17201722 if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
17211723 strlen(MAI.getSeparatorString())) == 0)
17221724 atInsnStart = true;
17231725 if (atInsnStart && !std::isspace(static_cast(*Str))) {
1724 Length += MAI.getMaxInstLength();
1726 Length += MaxInstLength;
17251727 atInsnStart = false;
17261728 }
17271729 if (atInsnStart && strncmp(Str, MAI.getCommentString().data(),
263263
264264 /// Measure the specified inline asm to determine an approximation of its
265265 /// length.
266 unsigned getInlineAsmLength(const char *Str,
267 const MCAsmInfo &MAI) const override;
266 unsigned getInlineAsmLength(
267 const char *Str,
268 const MCAsmInfo &MAI,
269 const TargetSubtargetInfo *STI = nullptr) const override;
268270
269271 /// Allocate and return a hazard recognizer to use for this target when
270272 /// scheduling the machine instructions after register allocation.
0 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
2
3 ; Make sure the code size estimate for inline asm is 12-bytes per
4 ; instruction, rather than 8 in previous generations.
5
6 ; GCN-LABEL: {{^}}long_forward_branch_gfx10only:
7 ; GFX9: s_cmp_eq_u32
8 ; GFX9-NEXT: s_cbranch_scc1
9
10 ; GFX10: s_cmp_eq_u32
11 ; GFX10-NEXT: s_cbranch_scc0
12 ; GFX10: s_getpc_b64
13 ; GFX10: s_add_u32
14 ; GFX10: s_addc_u32
15 ; GFX10: s_setpc_b64
16 define amdgpu_kernel void @long_forward_branch_gfx10only(i32 addrspace(1)* %arg, i32 %cnd) #0 {
17 bb0:
18 %cmp = icmp eq i32 %cnd, 0
19 br i1 %cmp, label %bb3, label %bb2 ; +9 dword branch
20
21 bb2:
22 ; Estimated as 40-bytes on gfx10 (requiring a long branch), but
23 ; 16-bytes on gfx9 (allowing a short branch)
24 call void asm sideeffect
25 "v_nop_e64
26 v_nop_e64", ""() #0
27 br label %bb3
28
29 bb3:
30 store volatile i32 %cnd, i32 addrspace(1)* %arg
31 ret void
32 }