llvm.org GIT mirror llvm / a12a087
[AMDGPU] Fix for branch offset hardware workaround Summary: This fixes a hardware bug that makes a branch offset of 0x3f unsafe. This replaces the 32 bit branch with offset 0x3f to a 64 bit instruction that includes the same 32 bit branch and the encoding for a s_nop 0 to follow. The relaxer than modifies the offsets accordingly. Change-Id: I10b7aed99d651f8159401b01bb421f105fa6288e Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63494 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364451 91177308-0d34-0410-b5e6-96231b3b80d8 Ryan Taylor 4 months ago
10 changed file(s) with 381 addition(s) and 24 deletion(s). Raw diff Collapse all Expand all
190190 "HasFlatSegmentOffsetBug",
191191 "true",
192192 "GFX10 bug, inst_offset ignored in flat segment"
193 >;
194
195 def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug",
196 "HasOffset3fBug",
197 "true",
198 "Branch offset of 3f hardware bug"
193199 >;
194200
195201 class SubtargetFeatureLDSBankCount : SubtargetFeature <
766772 FeatureVcmpxExecWARHazard,
767773 FeatureLdsBranchVmemWARHazard,
768774 FeatureNSAtoVMEMBug,
775 FeatureOffset3fBug,
769776 FeatureFlatSegmentOffsetBug
770777 ];
771778 }
10671074 def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
10681075 AssemblerPredicate<"FeatureDot6Insts">;
10691076
1077 def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">,
1078 AssemblerPredicate<"FeatureOffset3fBug">;
1079
10701080 def EnableLateCFGStructurize : Predicate<
10711081 "EnableLateStructurizeCFG">;
10721082
260260 HasVcmpxExecWARHazard(false),
261261 HasLdsBranchVmemWARHazard(false),
262262 HasNSAtoVMEMBug(false),
263 HasOffset3fBug(false),
263264 HasFlatSegmentOffsetBug(false),
264265
265266 FeatureDisable(false),
367367 bool HasVcmpxExecWARHazard;
368368 bool HasLdsBranchVmemWARHazard;
369369 bool HasNSAtoVMEMBug;
370 bool HasOffset3fBug;
370371 bool HasFlatSegmentOffsetBug;
371372
372373 // Dummy feature to use for assembler in tablegen.
923924
924925 bool hasR128A16() const {
925926 return HasR128A16;
927 }
928
929 bool hasOffset3fBug() const {
930 return HasOffset3fBug;
926931 }
927932
928933 bool hasNSAEncoding() const {
3636 const MCSubtargetInfo *STI) const override;
3737 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
3838 const MCRelaxableFragment *DF,
39 const MCAsmLayout &Layout) const override {
40 return false;
41 }
39 const MCAsmLayout &Layout) const override;
40
4241 void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
43 MCInst &Res) const override {
44 llvm_unreachable("Not implemented");
45 }
42 MCInst &Res) const override;
43
4644 bool mayNeedRelaxation(const MCInst &Inst,
47 const MCSubtargetInfo &STI) const override {
48 return false;
49 }
45 const MCSubtargetInfo &STI) const override;
5046
5147 unsigned getMinimumNopSize() const override;
5248 bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
5551 };
5652
5753 } //End anonymous namespace
54
55 void AMDGPUAsmBackend::relaxInstruction(const MCInst &Inst,
56 const MCSubtargetInfo &STI,
57 MCInst &Res) const {
58 unsigned RelaxedOpcode = AMDGPU::getSOPPWithRelaxation(Inst.getOpcode());
59 Res.setOpcode(RelaxedOpcode);
60 Res.addOperand(Inst.getOperand(0));
61 return;
62 }
63
64 bool AMDGPUAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
65 uint64_t Value,
66 const MCRelaxableFragment *DF,
67 const MCAsmLayout &Layout) const {
68 // if the branch target has an offset of x3f this needs to be relaxed to
69 // add a s_nop 0 immediately after branch to effectively increment offset
70 // for hardware workaround in gfx1010
71 return (((int64_t(Value)/4)-1) == 0x3f);
72 }
73
74 bool AMDGPUAsmBackend::mayNeedRelaxation(const MCInst &Inst,
75 const MCSubtargetInfo &STI) const {
76 if (!STI.getFeatureBits()[AMDGPU::FeatureOffset3fBug])
77 return false;
78
79 if (AMDGPU::getSOPPWithRelaxation(Inst.getOpcode()) >= 0)
80 return true;
81
82 return false;
83 }
5884
5985 static unsigned getFixupKindNumBytes(unsigned Kind) {
6086 switch (Kind) {
23082308 let ValueCols = [["0"]];
23092309 }
23102310
2311 // Maps a SOPP to a SOPP with S_NOP
2312 def getSOPPWithRelaxation : InstrMapping {
2313 let FilterClass = "Base_SOPP";
2314 let RowFields = ["AsmString"];
2315 let ColFields = ["Size"];
2316 let KeyCol = ["4"];
2317 let ValueCols = [["8"]];
2318 }
2319
23112320 include "SIInstructions.td"
23122321
23132322 include "DSInstructions.td"
921921 // SOPP Instructions
922922 //===----------------------------------------------------------------------===//
923923
924 class Base_SOPP {
925 string AsmString = asm;
926 }
927
924928 class SOPPe op> : Enc32 {
925929 bits <16> simm16;
926930
930934 }
931935
932936 class SOPP op, dag ins, string asm, list pattern = []> :
933 InstSI <(outs), ins, asm, pattern >, SOPPe {
937 InstSI <(outs), ins, asm, pattern >, SOPPe , Base_SOPP {
934938
935939 let mayLoad = 0;
936940 let mayStore = 0;
943947 let UseNamedOperandTable = 1;
944948 }
945949
946
947950 def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
951
952 class SOPP_w_nop_e op> : Enc64 {
953 bits <16> simm16;
954
955 let Inst{15-0} = simm16;
956 let Inst{22-16} = op;
957 let Inst{31-23} = 0x17f; // encoding
958 let Inst{47-32} = 0x0;
959 let Inst{54-48} = S_NOP.Inst{22-16}; // opcode
960 let Inst{63-55} = S_NOP.Inst{31-23}; // encoding
961 }
962
963 class SOPP_w_nop op, dag ins, string asm, list pattern = []> :
964 InstSI <(outs), ins, asm, pattern >, SOPP_w_nop_e , Base_SOPP {
965
966 let mayLoad = 0;
967 let mayStore = 0;
968 let hasSideEffects = 0;
969 let SALU = 1;
970 let SOPP = 1;
971 let Size = 8;
972 let SchedRW = [WriteSALU];
973
974 let UseNamedOperandTable = 1;
975 }
976
977 multiclass SOPP_With_Relaxation op, dag ins, string asm, list pattern = []> {
978 def "" : SOPP ;
979 def _pad_s_nop : SOPP_w_nop ;
980 }
948981
949982 let isTerminator = 1 in {
950983
9751008 } // End SubtargetPredicate = isGFX10Plus
9761009
9771010 let isBranch = 1, SchedRW = [WriteBranch] in {
978 def S_BRANCH : SOPP <
1011 let isBarrier = 1 in {
1012 defm S_BRANCH : SOPP_With_Relaxation <
9791013 0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
980 [(br bb:$simm16)]> {
981 let isBarrier = 1;
1014 [(br bb:$simm16)]>;
9821015 }
9831016
9841017 let Uses = [SCC] in {
985 def S_CBRANCH_SCC0 : SOPP <
1018 defm S_CBRANCH_SCC0 : SOPP_With_Relaxation <
9861019 0x00000004, (ins sopp_brtarget:$simm16),
9871020 "s_cbranch_scc0 $simm16"
9881021 >;
989 def S_CBRANCH_SCC1 : SOPP <
1022 defm S_CBRANCH_SCC1 : SOPP_With_Relaxation <
9901023 0x00000005, (ins sopp_brtarget:$simm16),
9911024 "s_cbranch_scc1 $simm16"
9921025 >;
9931026 } // End Uses = [SCC]
9941027
9951028 let Uses = [VCC] in {
996 def S_CBRANCH_VCCZ : SOPP <
1029 defm S_CBRANCH_VCCZ : SOPP_With_Relaxation <
9971030 0x00000006, (ins sopp_brtarget:$simm16),
9981031 "s_cbranch_vccz $simm16"
9991032 >;
1000 def S_CBRANCH_VCCNZ : SOPP <
1033 defm S_CBRANCH_VCCNZ : SOPP_With_Relaxation <
10011034 0x00000007, (ins sopp_brtarget:$simm16),
10021035 "s_cbranch_vccnz $simm16"
10031036 >;
10041037 } // End Uses = [VCC]
10051038
10061039 let Uses = [EXEC] in {
1007 def S_CBRANCH_EXECZ : SOPP <
1040 defm S_CBRANCH_EXECZ : SOPP_With_Relaxation <
10081041 0x00000008, (ins sopp_brtarget:$simm16),
10091042 "s_cbranch_execz $simm16"
10101043 >;
1011 def S_CBRANCH_EXECNZ : SOPP <
1044 defm S_CBRANCH_EXECNZ : SOPP_With_Relaxation <
10121045 0x00000009, (ins sopp_brtarget:$simm16),
10131046 "s_cbranch_execnz $simm16"
10141047 >;
10151048 } // End Uses = [EXEC]
10161049
1017 def S_CBRANCH_CDBGSYS : SOPP <
1050 defm S_CBRANCH_CDBGSYS : SOPP_With_Relaxation <
10181051 0x00000017, (ins sopp_brtarget:$simm16),
10191052 "s_cbranch_cdbgsys $simm16"
10201053 >;
10211054
1022 def S_CBRANCH_CDBGSYS_AND_USER : SOPP <
1055 defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_With_Relaxation <
10231056 0x0000001A, (ins sopp_brtarget:$simm16),
10241057 "s_cbranch_cdbgsys_and_user $simm16"
10251058 >;
10261059
1027 def S_CBRANCH_CDBGSYS_OR_USER : SOPP <
1060 defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_With_Relaxation <
10281061 0x00000019, (ins sopp_brtarget:$simm16),
10291062 "s_cbranch_cdbgsys_or_user $simm16"
10301063 >;
10311064
1032 def S_CBRANCH_CDBGUSER : SOPP <
1065 defm S_CBRANCH_CDBGUSER : SOPP_With_Relaxation <
10331066 0x00000018, (ins sopp_brtarget:$simm16),
10341067 "s_cbranch_cdbguser $simm16"
10351068 >;
189189 LLVM_READONLY
190190 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
191191
192 LLVM_READONLY
193 int getSOPPWithRelaxation(uint16_t Opcode);
194
192195 struct MIMGBaseOpcodeInfo {
193196 MIMGBaseOpcode BaseOpcode;
194197 bool Store;
0 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10
1 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -filetype=obj %s | llvm-objdump -disassemble -mcpu=gfx1010 - | FileCheck %s --check-prefix=BIN
2 s_getpc_b64 s[0:1]
3 s_cbranch_vccnz BB0_1
4 // GFX10: s_cbranch_vccnz BB0_1 ; encoding: [A,A,0x87,0xbf]
5 // GFX10-NEXT: ; fixup A - offset: 0, value: BB0_1, kind: fixup_si_sopp_br
6 // BIN: s_cbranch_vccnz BB0_1 // 000000000004: BF870040
7 s_nop 0
8 s_nop 0
9 s_nop 0
10 s_nop 0
11 s_nop 0
12 s_nop 0
13 s_nop 0
14 s_nop 0
15 s_nop 0
16 s_nop 0
17 s_nop 0
18 s_nop 0
19 s_nop 0
20 s_nop 0
21 s_nop 0
22 s_nop 0
23 s_nop 0
24 s_nop 0
25 s_nop 0
26 s_nop 0
27 s_nop 0
28 s_nop 0
29 s_nop 0
30 s_nop 0
31 s_nop 0
32 s_nop 0
33 s_nop 0
34 s_nop 0
35 s_nop 0
36 s_nop 0
37 s_nop 0
38 s_nop 0
39 s_nop 0
40 s_nop 0
41 s_nop 0
42 s_nop 0
43 s_nop 0
44 s_nop 0
45 s_nop 0
46 s_nop 0
47 s_nop 0
48 s_nop 0
49 s_nop 0
50 s_nop 0
51 s_nop 0
52 s_nop 0
53 s_nop 0
54 s_nop 0
55 s_nop 0
56 s_nop 0
57 s_nop 0
58 s_nop 0
59 s_nop 0
60 s_nop 0
61 s_nop 0
62 s_nop 0
63 s_nop 0
64 s_nop 0
65 s_nop 0
66 s_nop 0
67 s_nop 0
68 s_nop 0
69 s_nop 0
70 s_nop 0
71 BB0_1:
72 s_nop 0
73 s_endpgm
0 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10
1 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -filetype=obj %s | llvm-objdump -disassemble -mcpu=gfx1010 - | FileCheck %s --check-prefix=BIN
2 s_getpc_b64 s[0:1]
3 s_cbranch_vccnz BB0_1
4 // GFX10: s_cbranch_vccnz BB0_1 ; encoding: [A,A,0x87,0xbf]
5 // GFX10-NEXT: ; fixup A - offset: 0, value: BB0_1, kind: fixup_si_sopp_br
6 // BIN: s_cbranch_vccnz BB0_1 // 000000000004: BF870041
7 s_cbranch_execz BB0_3
8 // GFX10: s_cbranch_execz BB0_3 ; encoding: [A,A,0x88,0xbf]
9 // GFX10-NEXT: ; fixup A - offset: 0, value: BB0_3, kind: fixup_si_sopp_br
10 // BIN: s_cbranch_execz BB0_3 // 00000000000C: BF880040
11 s_nop 0
12 s_nop 0
13 s_nop 0
14 s_nop 0
15 s_nop 0
16 s_nop 0
17 s_nop 0
18 s_nop 0
19 s_nop 0
20 s_nop 0
21 s_nop 0
22 s_nop 0
23 s_nop 0
24 s_nop 0
25 s_nop 0
26 s_nop 0
27 s_nop 0
28 s_nop 0
29 s_nop 0
30 s_nop 0
31 s_nop 0
32 s_nop 0
33 s_nop 0
34 s_nop 0
35 s_nop 0
36 s_nop 0
37 s_nop 0
38 s_nop 0
39 s_nop 0
40 s_nop 0
41 s_nop 0
42 s_nop 0
43 s_nop 0
44 s_nop 0
45 s_nop 0
46 s_nop 0
47 s_nop 0
48 s_nop 0
49 s_nop 0
50 s_nop 0
51 s_nop 0
52 s_nop 0
53 s_nop 0
54 s_nop 0
55 s_nop 0
56 s_nop 0
57 s_nop 0
58 s_nop 0
59 s_nop 0
60 s_nop 0
61 s_nop 0
62 s_nop 0
63 s_nop 0
64 s_nop 0
65 s_nop 0
66 s_nop 0
67 s_nop 0
68 s_nop 0
69 s_nop 0
70 s_nop 0
71 s_nop 0
72 s_nop 0
73 BB0_1:
74 s_nop 0
75 BB0_3:
76 s_nop 0
77 s_endpgm
0 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10
1 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -filetype=obj %s | llvm-objdump -disassemble -mcpu=gfx1010 - | FileCheck %s --check-prefix=BIN
2 s_getpc_b64 s[0:1]
3 s_cbranch_vccnz BB0_2
4 // GFX10: s_cbranch_vccnz BB0_2 ; encoding: [A,A,0x87,0xbf]
5 // GFX10-NEXT: ; fixup A - offset: 0, value: BB0_2, kind: fixup_si_sopp_br
6 // BIN: s_cbranch_vccnz BB0_2 // 000000000004: BF870061
7 s_nop 0
8 s_nop 0
9 s_nop 0
10 s_nop 0
11 s_nop 0
12 s_nop 0
13 s_nop 0
14 s_nop 0
15 s_nop 0
16 s_nop 0
17 s_nop 0
18 s_nop 0
19 s_nop 0
20 s_nop 0
21 s_nop 0
22 s_nop 0
23 s_nop 0
24 s_nop 0
25 v_nop
26 s_nop 0
27 s_nop 0
28 s_nop 0
29 s_nop 0
30 s_cbranch_vccnz BB0_1
31 // GFX10: s_cbranch_vccnz BB0_1 ; encoding: [A,A,0x87,0xbf]
32 // GFX10-NEXT: ; fixup A - offset: 0, value: BB0_1, kind: fixup_si_sopp_br
33 // BIN: s_cbranch_vccnz BB0_1 // 000000000064: BF870041
34 s_nop 0
35 s_cbranch_execz BB0_3
36 // GFX10: s_cbranch_execz BB0_3 ; encoding: [A,A,0x88,0xbf]
37 // GFX10-NEXT: ; fixup A - offset: 0, value: BB0_3, kind: fixup_si_sopp_br
38 // BIN: s_cbranch_execz BB0_3 // 00000000006C: BF880040
39 s_nop 0
40 s_nop 0
41 s_nop 0
42 s_nop 0
43 s_nop 0
44 s_nop 0
45 s_nop 0
46 s_nop 0
47 s_nop 0
48 s_nop 0
49 s_nop 0
50 s_nop 0
51 s_nop 0
52 s_nop 0
53 s_nop 0
54 s_nop 0
55 s_nop 0
56 s_nop 0
57 s_nop 0
58 s_nop 0
59 s_nop 0
60 s_nop 0
61 s_nop 0
62 s_nop 0
63 s_nop 0
64 s_nop 0
65 s_nop 0
66 s_nop 0
67 s_nop 0
68 s_nop 0
69 s_nop 0
70 s_nop 0
71 s_nop 0
72 s_nop 0
73 s_nop 0
74 s_nop 0
75 s_nop 0
76 s_nop 0
77 s_nop 0
78 s_nop 0
79 s_nop 0
80 s_nop 0
81 s_nop 0
82 s_nop 0
83 s_nop 0
84 s_nop 0
85 s_nop 0
86 s_nop 0
87 s_nop 0
88 s_nop 0
89 s_nop 0
90 s_nop 0
91 s_nop 0
92 s_nop 0
93 s_nop 0
94 s_nop 0
95 s_nop 0
96 s_nop 0
97 s_nop 0
98 s_nop 0
99 s_nop 0
100 s_nop 0
101 BB0_1:
102 s_nop 0
103 BB0_3:
104 s_nop 0
105 s_nop 0
106 s_nop 0
107 s_cbranch_vccnz BB0_2
108 // GFX10: s_cbranch_vccnz BB0_2 ; encoding: [A,A,0x87,0xbf]
109 // GFX10-NEXT: ; fixup A - offset: 0, value: BB0_2, kind: fixup_si_sopp_br
110 // BIN: s_cbranch_vccnz BB0_2 // 00000000017C: BF870003
111 s_nop 0
112 s_nop 0
113 s_nop 0
114 BB0_2:
115 s_nop 0
116 s_nop 0
117 s_endpgm