llvm.org GIT mirror llvm / 7829a6d
AMDGPU: Add sram-ecc feature Differential Revision: https://reviews.llvm.org/D53222 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346177 91177308-0d34-0410-b5e6-96231b3b80d8 Konstantin Zhuravlyov 11 months ago
14 changed file(s) with 183 addition(s) and 45 deletion(s). Raw diff Collapse all Expand all
206206 names.
207207 ``gfx906`` ``amdgcn`` dGPU - xnack *TBA*
208208 [off]
209 sram-ecc
210 [on]
209211 .. TODO
210212 Add product
211213 names.
245247 .. table:: AMDGPU Target Features
246248 :name: amdgpu-target-feature-table
247249
248 ============== ==================================================
249 Target Feature Description
250 ============== ==================================================
251 -m[no-]xnack Enable/disable generating code that has
252 memory clauses that are compatible with
253 having XNACK replay enabled.
254
255 This is used for demand paging and page
256 migration. If XNACK replay is enabled in
257 the device, then if a page fault occurs
258 the code may execute incorrectly if the
259 ``xnack`` feature is not enabled. Executing
260 code that has the feature enabled on a
261 device that does not have XNACK replay
262 enabled will execute correctly, but may
263 be less performant than code with the
264 feature disabled.
265 ============== ==================================================
250 =============== ==================================================
251 Target Feature Description
252 =============== ==================================================
253 -m[no-]xnack Enable/disable generating code that has
254 memory clauses that are compatible with
255 having XNACK replay enabled.
256
257 This is used for demand paging and page
258 migration. If XNACK replay is enabled in
259 the device, then if a page fault occurs
260 the code may execute incorrectly if the
261 ``xnack`` feature is not enabled. Executing
262 code that has the feature enabled on a
263 device that does not have XNACK replay
264 enabled will execute correctly, but may
265 be less performant than code with the
266 feature disabled.
267 -m[no-]sram-ecc Enable/disable generating code that assumes SRAM
268 ECC is enabled/disabled.
269 =============== ==================================================
266270
267271 .. _amdgpu-address-spaces:
268272
544548 If the processor
545549 does not support the
546550 ``xnack`` target
551 feature then must
552 be 0.
553 See
554 :ref:`amdgpu-target-features`.
555 ``EF_AMDGPU_SRAM_ECC`` 0x00000200 Indicates if the ``sram-ecc``
556 target feature is
557 enabled for all code
558 contained in the code object.
559 If the processor
560 does not support the
561 ``sram-ecc`` target
547562 feature then must
548563 be 0.
549564 See
710710 EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
711711 EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,
712712
713 // Indicates if the xnack target feature is enabled for all code contained in
714 // the object.
713 // Indicates if the "xnack" target feature is enabled for all code contained
714 // in the object.
715715 EF_AMDGPU_XNACK = 0x100,
716 // Indicates if the "sram-ecc" target feature is enabled for all code
717 // contained in the object.
718 EF_AMDGPU_SRAM_ECC = 0x200,
716719 };
717720
718721 // ELF Relocation types for AMDGPU
403403 BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
404404 BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
405405 BCase(EF_AMDGPU_XNACK);
406 BCase(EF_AMDGPU_SRAM_ECC);
406407 break;
407408 case ELF::EM_X86_64:
408409 break;
265265 "Has deep learning instructions"
266266 >;
267267
268 def FeatureD16PreservesUnusedBits : SubtargetFeature<
269 "d16-preserves-unused-bits",
270 "D16PreservesUnusedBits",
271 "true",
272 "If present, then instructions defined by HasD16LoadStore predicate preserve "
273 "unused bits. Otherwise instructions defined by HasD16LoadStore predicate "
274 "zero unused bits."
268 def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
269 "EnableSRAMECC",
270 "true",
271 "Enable SRAM ECC"
275272 >;
276273
277274 //===------------------------------------------------------------===//
523520 def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
524521 [FeatureGFX9,
525522 FeatureMadMixInsts,
526 FeatureLDSBankCount32,
527 FeatureD16PreservesUnusedBits]>;
523 FeatureLDSBankCount32]>;
528524
529525 def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
530526 [FeatureGFX9,
531527 FeatureMadMixInsts,
532528 FeatureLDSBankCount32,
533 FeatureXNACK,
534 FeatureD16PreservesUnusedBits]>;
529 FeatureXNACK]>;
535530
536531 def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4,
537532 [FeatureGFX9,
538533 FeatureLDSBankCount32,
539 FeatureFmaMixInsts,
540 FeatureD16PreservesUnusedBits]>;
534 FeatureFmaMixInsts]>;
541535
542536 def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
543537 [FeatureGFX9,
544538 HalfRate64Ops,
545539 FeatureFmaMixInsts,
546540 FeatureLDSBankCount32,
547 FeatureDLInsts]>;
541 FeatureDLInsts,
542 FeatureSRAMECC]>;
548543
549544 def FeatureISAVersion9_0_9 : SubtargetFeatureISAVersion <9,0,9,
550545 [FeatureGFX9,
551546 FeatureMadMixInsts,
552547 FeatureLDSBankCount32,
553 FeatureXNACK,
554 FeatureD16PreservesUnusedBits]>;
548 FeatureXNACK]>;
555549
556550 //===----------------------------------------------------------------------===//
557551 // Debugger related subtarget features.
683677 def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
684678 AssemblerPredicate<"!FeatureUnpackedD16VMem">;
685679
686 def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">,
687 AssemblerPredicate<"FeatureD16PreservesUnusedBits">;
680 def D16PreservesUnusedBits :
681 Predicate<"Subtarget->hasD16LoadStore() && !Subtarget->isSRAMECCEnabled()">,
682 AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
688683
689684 def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
690685 def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
197197 HasDPP(false),
198198 HasR128A16(false),
199199 HasDLInsts(false),
200 D16PreservesUnusedBits(false),
200 EnableSRAMECC(false),
201201 FlatAddressSpace(false),
202202 FlatInstOffsets(false),
203203 FlatGlobalInsts(false),
352352 bool HasDPP;
353353 bool HasR128A16;
354354 bool HasDLInsts;
355 bool D16PreservesUnusedBits;
355 bool EnableSRAMECC;
356356 bool FlatAddressSpace;
357357 bool FlatInstOffsets;
358358 bool FlatGlobalInsts;
678678 return HasDLInsts;
679679 }
680680
681 bool d16PreservesUnusedBits() const {
682 return D16PreservesUnusedBits;
681 bool isSRAMECCEnabled() const {
682 return EnableSRAMECC;
683683 }
684684
685685 // Scratch is allocated in 256 dword per wave blocks for the entire
346346 if (AMDGPU::hasXNACK(STI))
347347 EFlags |= ELF::EF_AMDGPU_XNACK;
348348
349 EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
350 if (AMDGPU::hasSRAMECC(STI))
351 EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
352
349353 MCA.setELFHeaderEFlags(EFlags);
350354 }
351355
151151
152152 if (hasXNACK(*STI))
153153 Stream << "+xnack";
154 if (hasSRAMECC(*STI))
155 Stream << "+sram-ecc";
154156
155157 Stream.flush();
156158 }
590592
591593 bool hasXNACK(const MCSubtargetInfo &STI) {
592594 return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
595 }
596
597 bool hasSRAMECC(const MCSubtargetInfo &STI) {
598 return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
593599 }
594600
595601 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
341341 }
342342
343343 bool hasXNACK(const MCSubtargetInfo &STI);
344 bool hasSRAMECC(const MCSubtargetInfo &STI);
344345 bool hasMIMG_R128(const MCSubtargetInfo &STI);
345346 bool hasPackedD16(const MCSubtargetInfo &STI);
346347
3333 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3,+xnack < %s | FileCheck --check-prefixes=XNACK-GFX900 %s
3434 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+code-object-v3,-xnack < %s | FileCheck --check-prefixes=NO-XNACK-GFX902 %s
3535
36 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+code-object-v3,+sram-ecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX904 %s
37 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+code-object-v3,-sram-ecc < %s | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s
38
39 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+code-object-v3,+sram-ecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX904 %s
40 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+code-object-v3,+xnack < %s | FileCheck --check-prefixes=XNACK-GFX906 %s
41
3642 ; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
3743 ; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
3844 ; GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
4753 ; GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
4854 ; GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack"
4955 ; GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904"
50 ; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906"
56 ; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc"
5157
5258 ; XNACK-GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack"
53 ; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902
59 ; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"
60
61 ; SRAM-ECC-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+sram-ecc"
62 ; NO-SRAM-ECC-GFX906: "amdgcn-amd-amdhsa--gfx906"
63
64 ; SRAM-ECC-XNACK-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack+sram-ecc"
65 ; XNACK-GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc"
5466
5567 define amdgpu_kernel void @directive_amdgcn_target() {
5668 ret void
8585 ; GFX902-NEXT: EF_AMDGPU_XNACK (0x100)
8686 ; GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E)
8787 ; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
88 ; GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
8889 ; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
8990 ; ALL: ]
9091
0 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX902 %s
1 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 -mattr=-sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX902 %s
2 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 -mattr=+sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX902 %s
3
4 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
5 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=-sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s
6 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
7 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc,+xnack < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s
8
9 ; NO-SRAM-ECC-GFX902: Flags [
10 ; NO-SRAM-ECC-GFX902-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D)
11 ; NO-SRAM-ECC-GFX902-NEXT: EF_AMDGPU_XNACK (0x100)
12 ; NO-SRAM-ECC-GFX902-NEXT: ]
13
14 ; SRAM-ECC-GFX902: Flags [
15 ; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D)
16 ; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
17 ; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_XNACK (0x100)
18 ; SRAM-ECC-GFX902-NEXT: ]
19
20 ; NO-SRAM-ECC-GFX906: Flags [
21 ; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
22 ; NO-SRAM-ECC-GFX906-NEXT: ]
23
24 ; SRAM-ECC-GFX906: Flags [
25 ; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
26 ; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
27 ; SRAM-ECC-GFX906-NEXT: ]
28
29 ; SRAM-ECC-XNACK-GFX906: Flags [
30 ; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
31 ; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
32 ; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_XNACK (0x100)
33 ; SRAM-ECC-XNACK-GFX906-NEXT: ]
34
35 define amdgpu_kernel void @elf_header() {
36 ret void
37 }
0 # RUN: yaml2obj -docnum=1 %s > %t.o.1
1 # RUN: llvm-readobj -s -file-headers %t.o.1 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-NONE %s
2 # RUN: obj2yaml %t.o.1 | FileCheck --check-prefixes=YAML-SRAM-ECC-NONE %s
3 # RUN: yaml2obj -docnum=2 %s > %t.o.2
4 # RUN: llvm-readobj -s -file-headers %t.o.2 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-GFX900 %s
5 # RUN: obj2yaml %t.o.2 | FileCheck --check-prefixes=YAML-SRAM-ECC-GFX900 %s
6 # RUN: yaml2obj -docnum=3 %s > %t.o.3
7 # RUN: llvm-readobj -s -file-headers %t.o.3 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-XNACK-GFX900 %s
8 # RUN: obj2yaml %t.o.3 | FileCheck --check-prefixes=YAML-SRAM-ECC-XNACK-GFX900 %s
9
10 # ELF-SRAM-ECC-NONE: Flags [
11 # ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
12 # ELF-SRAM-ECC-NONE-NEXT: ]
13
14 # ELF-SRAM-ECC-GFX900: Flags [
15 # ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
16 # ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
17 # ELF-SRAM-ECC-GFX900-NEXT: ]
18
19 # ELF-SRAM-ECC-XNACK-GFX900: Flags [
20 # ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
21 # ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
22 # ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_XNACK (0x100)
23 # ELF-SRAM-ECC-XNACK-GFX900-NEXT: ]
24
25 # YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_SRAM_ECC ]
26 # YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
27 # YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
28
29 # Doc1
30 --- !ELF
31 FileHeader:
32 Class: ELFCLASS64
33 Data: ELFDATA2LSB
34 OSABI: ELFOSABI_NONE
35 Type: ET_REL
36 Machine: EM_AMDGPU
37 Flags: [ EF_AMDGPU_SRAM_ECC ]
38 ...
39
40 # Doc2
41 --- !ELF
42 FileHeader:
43 Class: ELFCLASS64
44 Data: ELFDATA2LSB
45 OSABI: ELFOSABI_NONE
46 Type: ET_REL
47 Machine: EM_AMDGPU
48 Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
49 ...
50
51 # Doc3
52 --- !ELF
53 FileHeader:
54 Class: ELFCLASS64
55 Data: ELFDATA2LSB
56 OSABI: ELFOSABI_NONE
57 Type: ET_REL
58 Machine: EM_AMDGPU
59 Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
60 ...
13541354 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
13551355 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
13561356 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
1357 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK)
1357 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
1358 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
13581359 };
13591360
13601361 static const EnumEntry ElfHeaderRISCVFlags[] = {