llvm.org GIT mirror llvm / f43d543
[AMDGPU] Add gfx1010 target definitions Differential Revision: https://reviews.llvm.org/D61041 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359113 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 1 year, 3 months ago
22 changed file(s) with 586 addition(s) and 134 deletion(s). Raw diff Collapse all Expand all
702702 EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
703703 EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
704704 EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
705 // AMDGCN GFX10.
706 EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
705707
706708 // Reserved for AMDGCN-based processors.
707709 EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
708710 EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x030,
711 EF_AMDGPU_MACH_AMDGCN_RESERVED2 = 0x032,
709712
710713 // First/last AMDGCN-based processors.
711714 EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
712 EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,
715 EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1010,
713716
714717 // Indicates if the "xnack" target feature is enabled for all code contained
715718 // in the object.
8787 COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
8888 COMPUTE_PGM_RSRC1(BULKY, 24, 1),
8989 COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
90 COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
91 COMPUTE_PGM_RSRC1(RESERVED0, 27, 5),
90 COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
91 COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
92 COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
93 COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
94 COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
9295 };
9396 #undef COMPUTE_PGM_RSRC1
9497
118121 };
119122 #undef COMPUTE_PGM_RSRC2
120123
124 // Compute program resource register 3. Must match hardware definition.
125 #define COMPUTE_PGM_RSRC3(NAME, SHIFT, WIDTH) \
126 AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_ ## NAME, SHIFT, WIDTH)
127 enum : int32_t {
128 COMPUTE_PGM_RSRC3(SHARED_VGPR_COUNT, 0, 4), // GFX10+
129 COMPUTE_PGM_RSRC3(RESERVED0, 4, 28),
130 };
131 #undef COMPUTE_PGM_RSRC3
132
121133 // Kernel code properties. Must be kept backwards compatible.
122134 #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
123135 AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
129141 KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
130142 KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
131143 KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
132 KERNEL_CODE_PROPERTY(RESERVED0, 7, 9),
144 KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
145 KERNEL_CODE_PROPERTY(RESERVED1, 11, 5),
133146 };
134147 #undef KERNEL_CODE_PROPERTY
135148
139152 uint32_t private_segment_fixed_size;
140153 uint8_t reserved0[8];
141154 int64_t kernel_code_entry_byte_offset;
142 uint8_t reserved1[24];
155 uint8_t reserved1[20];
156 uint32_t compute_pgm_rsrc3; // GFX10+
143157 uint32_t compute_pgm_rsrc1;
144158 uint32_t compute_pgm_rsrc2;
145159 uint16_t kernel_code_properties;
165179 offsetof(kernel_descriptor_t, reserved1) == 24,
166180 "invalid offset for reserved1");
167181 static_assert(
182 offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44,
183 "invalid offset for compute_pgm_rsrc3");
184 static_assert(
168185 offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48,
169186 "invalid offset for compute_pgm_rsrc1");
170187 static_assert(
122122 GK_GFX906 = 63,
123123 GK_GFX909 = 65,
124124
125 GK_GFX1010 = 71,
126
125127 GK_AMDGCN_FIRST = GK_GFX600,
126 GK_AMDGCN_LAST = GK_GFX909,
128 GK_AMDGCN_LAST = GK_GFX1010,
127129 };
128130
129131 /// Instruction set architecture version.
410410 BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX904, EF_AMDGPU_MACH);
411411 BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
412412 BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
413 BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
413414 BCase(EF_AMDGPU_XNACK);
414415 BCase(EF_AMDGPU_SRAM_ECC);
415416 break;
6161
6262 // This table should be sorted by the value of GPUKind
6363 // Don't bother listing the implicitly true features
64 constexpr GPUInfo AMDGCNGPUs[33] = {
64 constexpr GPUInfo AMDGCNGPUs[34] = {
6565 // Name Canonical Kind Features
6666 // Name
6767 {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
9797 {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
9898 {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
9999 {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
100 {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
100101 };
101102
102103 const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) {
178179 }
179180
180181 switch (AK) {
181 case GK_GFX600: return {6, 0, 0};
182 case GK_GFX601: return {6, 0, 1};
183 case GK_GFX700: return {7, 0, 0};
184 case GK_GFX701: return {7, 0, 1};
185 case GK_GFX702: return {7, 0, 2};
186 case GK_GFX703: return {7, 0, 3};
187 case GK_GFX704: return {7, 0, 4};
188 case GK_GFX801: return {8, 0, 1};
189 case GK_GFX802: return {8, 0, 2};
190 case GK_GFX803: return {8, 0, 3};
191 case GK_GFX810: return {8, 1, 0};
192 case GK_GFX900: return {9, 0, 0};
193 case GK_GFX902: return {9, 0, 2};
194 case GK_GFX904: return {9, 0, 4};
195 case GK_GFX906: return {9, 0, 6};
196 case GK_GFX909: return {9, 0, 9};
197 default: return {0, 0, 0};
198 }
199 }
182 case GK_GFX600: return {6, 0, 0};
183 case GK_GFX601: return {6, 0, 1};
184 case GK_GFX700: return {7, 0, 0};
185 case GK_GFX701: return {7, 0, 1};
186 case GK_GFX702: return {7, 0, 2};
187 case GK_GFX703: return {7, 0, 3};
188 case GK_GFX704: return {7, 0, 4};
189 case GK_GFX801: return {8, 0, 1};
190 case GK_GFX802: return {8, 0, 2};
191 case GK_GFX803: return {8, 0, 3};
192 case GK_GFX810: return {8, 1, 0};
193 case GK_GFX900: return {9, 0, 0};
194 case GK_GFX902: return {9, 0, 2};
195 case GK_GFX904: return {9, 0, 4};
196 case GK_GFX906: return {9, 0, 6};
197 case GK_GFX909: return {9, 0, 9};
198 case GK_GFX1010: return {10, 1, 0};
199 default: return {0, 0, 0};
200 }
201 }
5757 "FlatScratchInsts",
5858 "true",
5959 "Have scratch_* flat memory instructions"
60 >;
61
62 def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
63 "ScalarFlatScratchInsts",
64 "true",
65 "Have s_scratch_* flat memory instructions"
6066 >;
6167
6268 def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
114120 "Enable XNACK support"
115121 >;
116122
123 def FeatureCuMode : SubtargetFeature<"cumode",
124 "EnableCuMode",
125 "true",
126 "Enable CU wavefront execution mode"
127 >;
128
117129 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
118130 "SGPRInitBug",
119131 "true",
120132 "VI SGPR initialization bug requiring a fixed SGPR allocation size"
133 >;
134
135 def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
136 "LDSMisalignedBug",
137 "true",
138 "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
139 >;
140
141 def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
142 "HasVcmpxPermlaneHazard",
143 "true",
144 "TODO: describe me"
145 >;
146
147 def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard",
148 "HasVMEMtoScalarWriteHazard",
149 "true",
150 "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution."
151 >;
152
153 def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard",
154 "HasSMEMtoVectorWriteHazard",
155 "true",
156 "s_load_dword followed by v_cmp page faults"
157 >;
158
159 def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
160 "HasInstFwdPrefetchBug",
161 "true",
162 "S_INST_PREFETCH instruction causes shader to hang"
163 >;
164
165 def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
166 "HasVcmpxExecWARHazard",
167 "true",
168 "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)"
169 >;
170
171 def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard",
172 "HasLdsBranchVmemWARHazard",
173 "true",
174 "Switching between LDS and VMEM-tex not waiting VM_VSRC=0"
175 >;
176
177 def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
178 "HasNSAtoVMEMBug",
179 "true",
180 "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
181 >;
182
183 def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
184 "HasFlatSegmentOffsetBug",
185 "true",
186 "GFX10 bug, inst_offset ignored in flat segment"
121187 >;
122188
123189 class SubtargetFeatureLDSBankCount : SubtargetFeature <
154220 "Additional instructions for GFX9+"
155221 >;
156222
223 def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
224 "GFX10Insts",
225 "true",
226 "Additional instructions for GFX10+"
227 >;
228
157229 def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
158230 "GFX7GFX8GFX9Insts",
159231 "true",
256328 "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
257329 >;
258330
331 def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
332 "HasNSAEncoding",
333 "true",
334 "Support NSA encoding for image instructions"
335 >;
336
259337 def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
260338 "HasIntClamp",
261339 "true",
296374 "EnableSRAMECC",
297375 "true",
298376 "Enable SRAM ECC"
377 >;
378
379 def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
380 "HasNoSdstCMPX",
381 "true",
382 "V_CMPX does not write VCC/SGPR in addition to EXEC"
383 >;
384
385 def FeatureVscnt : SubtargetFeature<"vscnt",
386 "HasVscnt",
387 "true",
388 "Has separate store vscnt counter"
389 >;
390
391 def FeatureRegisterBanking : SubtargetFeature<"register-banking",
392 "HasRegisterBanking",
393 "true",
394 "Has register banking"
395 >;
396
397 def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
398 "HasVOP3Literal",
399 "true",
400 "Can use one literal in VOP3"
401 >;
402
403 def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
404 "HasNoDataDepHazard",
405 "true",
406 "Does not need SW waitstates"
299407 >;
300408
301409 //===------------------------------------------------------------===//
486594 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
487595 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
488596 FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
489 FeatureScalarAtomics, FeatureR128A16
597 FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
598 ]
599 >;
600
601 def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
602 "gfx10",
603 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
604 FeatureFlatAddressSpace,
605 FeatureCIInsts, Feature16BitInsts,
606 FeatureSMemRealTime, FeatureInv2PiInlineImm,
607 FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P,
608 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
609 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
610 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
611 FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
612 FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
613 FeatureVOP3Literal, FeatureNoDataDepHazard,
614 FeatureDoesNotSupportSRAMECC
490615 ]
491616 >;
492617
599724 FeatureLDSBankCount32,
600725 FeatureXNACK,
601726 FeatureCodeObjectV3]>;
727
728 // TODO: Organize more features into groups.
729 def FeatureGroup {
730 // Bugs present on gfx10.1.
731 list GFX10_1_Bugs = [
732 FeatureVcmpxPermlaneHazard,
733 FeatureVMEMtoScalarWriteHazard,
734 FeatureSMEMtoVectorWriteHazard,
735 FeatureInstFwdPrefetchBug,
736 FeatureVcmpxExecWARHazard,
737 FeatureLdsBranchVmemWARHazard,
738 FeatureNSAtoVMEMBug,
739 FeatureFlatSegmentOffsetBug
740 ];
741 }
742
743 def FeatureISAVersion10_1_0 : FeatureSet<
744 !listconcat(FeatureGroup.GFX10_1_Bugs,
745 [FeatureGFX10,
746 FeatureLDSBankCount32,
747 FeatureDLInsts,
748 FeatureNSAEncoding,
749 FeatureWavefrontSize64,
750 FeatureScalarStores,
751 FeatureScalarAtomics,
752 FeatureScalarFlatScratchInsts,
753 FeatureLdsMisalignedBug,
754 FeatureCodeObjectV3])>;
602755
603756 //===----------------------------------------------------------------------===//
604757
686839 def isGFX6GFX7 :
687840 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
688841 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
842 AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">;
843
844 def isGFX6GFX7GFX10 :
845 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
846 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
847 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
689848 AssemblerPredicate<"!FeatureGCN3Encoding">;
690849
691850 def isGFX7Only :
692851 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
852 AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">;
853
854 def isGFX7GFX10 :
855 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
856 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
693857 AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">;
694858
695859 def isGFX7GFX8GFX9 :
698862 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
699863 AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">;
700864
865 def isGFX6GFX7GFX8GFX9 :
866 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
867 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
868 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
869 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
870 AssemblerPredicate<"!FeatureGFX10Insts">;
871
701872 def isGFX7Plus :
702873 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
703874 AssemblerPredicate<"FeatureCIInsts">;
723894 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
724895 AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">;
725896
897 def isGFX10Plus :
898 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
899 AssemblerPredicate<"FeatureGFX10Insts">;
900
726901 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
727902 AssemblerPredicate<"FeatureFlatAddressSpace">;
728903
730905 AssemblerPredicate<"FeatureFlatGlobalInsts">;
731906 def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
732907 AssemblerPredicate<"FeatureFlatScratchInsts">;
908 def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
909 AssemblerPredicate<"FeatureScalarFlatScratchInsts">;
733910 def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
734911 AssemblerPredicate<"FeatureGFX9Insts">;
735912
765942 Predicate<"Subtarget->hasSDWA()">,
766943 AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">;
767944
945 def HasSDWA10 :
946 Predicate<"Subtarget->hasSDWA()">,
947 AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">;
948
768949 def HasDPP : Predicate<"Subtarget->hasDPP()">,
769950 AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
770951
777958 def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
778959 AssemblerPredicate<"FeatureMadMixInsts">;
779960
961 def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
962 AssemblerPredicate<"FeatureScalarStores">;
963
780964 def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
781965 AssemblerPredicate<"FeatureScalarAtomics">;
966
967 def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
968 AssemblerPredicate<"FeatureNoSdstCMPX">;
969
970 def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
971 AssemblerPredicate<"!FeatureNoSdstCMPX">;
782972
783973 def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
784974 def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
180180
181181 HasApertureRegs(false),
182182 EnableXNACK(false),
183 EnableCuMode(false),
183184 TrapHandler(false),
184185
185186 EnableHugePrivateBuffer(false),
195196 CIInsts(false),
196197 GFX8Insts(false),
197198 GFX9Insts(false),
199 GFX10Insts(false),
198200 GFX7GFX8GFX9Insts(false),
199201 SGPRInitBug(false),
200202 HasSMemRealTime(false),
211213 HasSDWAOutModsVOPC(false),
212214 HasDPP(false),
213215 HasR128A16(false),
216 HasNSAEncoding(false),
214217 HasDLInsts(false),
215218 HasDot1Insts(false),
216219 HasDot2Insts(false),
217220 EnableSRAMECC(false),
218221 DoesNotSupportSRAMECC(false),
222 HasNoSdstCMPX(false),
223 HasVscnt(false),
224 HasRegisterBanking(false),
225 HasVOP3Literal(false),
226 HasNoDataDepHazard(false),
219227 FlatAddressSpace(false),
220228 FlatInstOffsets(false),
221229 FlatGlobalInsts(false),
222230 FlatScratchInsts(false),
231 ScalarFlatScratchInsts(false),
223232 AddNoCarryInsts(false),
224233 HasUnpackedD16VMem(false),
234 LDSMisalignedBug(false),
225235
226236 ScalarizeGlobal(false),
237
238 HasVcmpxPermlaneHazard(false),
239 HasVMEMtoScalarWriteHazard(false),
240 HasSMEMtoVectorWriteHazard(false),
241 HasInstFwdPrefetchBug(false),
242 HasVcmpxExecWARHazard(false),
243 HasLdsBranchVmemWARHazard(false),
244 HasNSAtoVMEMBug(false),
245 HasFlatSegmentOffsetBug(false),
227246
228247 FeatureDisable(false),
229248 InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
242261 return getLocalMemorySize();
243262 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
244263 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
264 if (!WorkGroupsPerCu)
265 return 0;
245266 unsigned MaxWaves = getMaxWavesPerEU();
246267 return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
247268 }
250271 const Function &F) const {
251272 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
252273 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
274 if (!WorkGroupsPerCu)
275 return 0;
253276 unsigned MaxWaves = getMaxWavesPerEU();
254277 unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
255278 unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
270293 case CallingConv::AMDGPU_CS:
271294 case CallingConv::AMDGPU_KERNEL:
272295 case CallingConv::SPIR_KERNEL:
273 return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
296 return std::make_pair(getWavefrontSize() * 2,
297 std::max(getWavefrontSize() * 4, 256u));
274298 case CallingConv::AMDGPU_VS:
275299 case CallingConv::AMDGPU_LS:
276300 case CallingConv::AMDGPU_HS:
495519 Policy.ShouldTrackLaneMasks = true;
496520 }
497521
522 bool GCNSubtarget::hasMadF16() const {
523 return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
524 }
525
498526 unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
527 if (getGeneration() >= AMDGPUSubtarget::GFX10)
528 return 10;
529
499530 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
500531 if (SGPRs <= 80)
501532 return 10;
542573
543574 unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
544575 const SIMachineFunctionInfo &MFI = *MF.getInfo();
576 if (getGeneration() >= AMDGPUSubtarget::GFX10)
577 return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
578
545579 if (MFI.hasFlatScratchInit()) {
546580 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
547581 return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
5454 SOUTHERN_ISLANDS = 4,
5555 SEA_ISLANDS = 5,
5656 VOLCANIC_ISLANDS = 6,
57 GFX9 = 7
57 GFX9 = 7,
58 GFX10 = 8
5859 };
5960
6061 private:
292293 bool UnalignedBufferAccess;
293294 bool HasApertureRegs;
294295 bool EnableXNACK;
296 bool EnableCuMode;
295297 bool TrapHandler;
296298
297299 // Used as options.
312314 bool CIInsts;
313315 bool GFX8Insts;
314316 bool GFX9Insts;
317 bool GFX10Insts;
315318 bool GFX7GFX8GFX9Insts;
316319 bool SGPRInitBug;
317320 bool HasSMemRealTime;
328331 bool HasSDWAOutModsVOPC;
329332 bool HasDPP;
330333 bool HasR128A16;
334 bool HasNSAEncoding;
331335 bool HasDLInsts;
332336 bool HasDot1Insts;
333337 bool HasDot2Insts;
334338 bool EnableSRAMECC;
335339 bool DoesNotSupportSRAMECC;
340 bool HasNoSdstCMPX;
341 bool HasVscnt;
342 bool HasRegisterBanking;
343 bool HasVOP3Literal;
344 bool HasNoDataDepHazard;
336345 bool FlatAddressSpace;
337346 bool FlatInstOffsets;
338347 bool FlatGlobalInsts;
339348 bool FlatScratchInsts;
349 bool ScalarFlatScratchInsts;
340350 bool AddNoCarryInsts;
341351 bool HasUnpackedD16VMem;
342352 bool R600ALUInst;
343353 bool CaymanISA;
344354 bool CFALUBug;
355 bool LDSMisalignedBug;
345356 bool HasVertexCache;
346357 short TexVTXClauseSize;
347358 bool ScalarizeGlobal;
359
360 bool HasVcmpxPermlaneHazard;
361 bool HasVMEMtoScalarWriteHazard;
362 bool HasSMEMtoVectorWriteHazard;
363 bool HasInstFwdPrefetchBug;
364 bool HasVcmpxExecWARHazard;
365 bool HasLdsBranchVmemWARHazard;
366 bool HasNSAtoVMEMBug;
367 bool HasFlatSegmentOffsetBug;
348368
349369 // Dummy feature to use for assembler in tablegen.
350370 bool FeatureDisable;
582602 return EnableXNACK;
583603 }
584604
605 bool isCuModeEnabled() const {
606 return EnableCuMode;
607 }
608
585609 bool hasFlatAddressSpace() const {
586610 return FlatAddressSpace;
587611 }
596620
597621 bool hasFlatScratchInsts() const {
598622 return FlatScratchInsts;
623 }
624
625 bool hasScalarFlatScratchInsts() const {
626 return ScalarFlatScratchInsts;
627 }
628
629 bool hasFlatSegmentOffsetBug() const {
630 return HasFlatSegmentOffsetBug;
599631 }
600632
601633 bool hasFlatLgkmVMemCountInOrder() const {
653685 return HasSDWAOutModsVOPC;
654686 }
655687
688 bool hasDLInsts() const {
689 return HasDLInsts;
690 }
691
692 bool hasDot1Insts() const {
693 return HasDot1Insts;
694 }
695
696 bool hasDot2Insts() const {
697 return HasDot2Insts;
698 }
699
700 bool isSRAMECCEnabled() const {
701 return EnableSRAMECC;
702 }
703
704 bool hasNoSdstCMPX() const {
705 return HasNoSdstCMPX;
706 }
707
708 bool hasVscnt() const {
709 return HasVscnt;
710 }
711
712 bool hasRegisterBanking() const {
713 return HasRegisterBanking;
714 }
715
716 bool hasVOP3Literal() const {
717 return HasVOP3Literal;
718 }
719
720 bool hasNoDataDepHazard() const {
721 return HasNoDataDepHazard;
722 }
723
656724 bool vmemWriteNeedsExpWaitcnt() const {
657725 return getGeneration() < SEA_ISLANDS;
658 }
659
660 bool hasDLInsts() const {
661 return HasDLInsts;
662 }
663
664 bool hasDot1Insts() const {
665 return HasDot1Insts;
666 }
667
668 bool hasDot2Insts() const {
669 return HasDot2Insts;
670 }
671
672 bool isSRAMECCEnabled() const {
673 return EnableSRAMECC;
674726 }
675727
676728 // Scratch is allocated in 256 dword per wave blocks for the entire
781833 return HasR128A16;
782834 }
783835
836 bool hasNSAEncoding() const {
837 return HasNSAEncoding;
838 }
839
840 bool hasMadF16() const;
841
784842 bool enableSIScheduler() const {
785843 return EnableSIScheduler;
786844 }
813871 bool hasReadM0SendMsgHazard() const {
814872 return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
815873 getGeneration() <= AMDGPUSubtarget::GFX9;
874 }
875
876 bool hasVcmpxPermlaneHazard() const {
877 return HasVcmpxPermlaneHazard;
878 }
879
880 bool hasVMEMtoScalarWriteHazard() const {
881 return HasVMEMtoScalarWriteHazard;
882 }
883
884 bool hasSMEMtoVectorWriteHazard() const {
885 return HasSMEMtoVectorWriteHazard;
886 }
887
888 bool hasLDSMisalignedBug() const {
889 return LDSMisalignedBug && !EnableCuMode;
890 }
891
892 bool hasInstFwdPrefetchBug() const {
893 return HasInstFwdPrefetchBug;
894 }
895
896 bool hasVcmpxExecWARHazard() const {
897 return HasVcmpxExecWARHazard;
898 }
899
900 bool hasLdsBranchVmemWARHazard() const {
901 return HasLdsBranchVmemWARHazard;
902 }
903
904 bool hasNSAtoVMEMBug() const {
905 return HasNSAtoVMEMBug;
816906 }
817907
818908 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
998998 return AMDGPU::isGFX9(getSTI());
999999 }
10001000
1001 bool isGFX10() const {
1002 return AMDGPU::isGFX10(getSTI());
1003 }
1004
10011005 bool hasInv2PiInlineImm() const {
10021006 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
10031007 }
14061410 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
14071411 if (AsmParser->isVI())
14081412 return isVReg32();
1409 else if (AsmParser->isGFX9())
1413 else if (AsmParser->isGFX9() || AsmParser->isGFX10())
14101414 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
14111415 else
14121416 return false;
29522956 if (getParser().parseIdentifier(KernelName))
29532957 return true;
29542958
2955 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2959 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
29562960
29572961 StringSet<> Seen;
29582962
163163 FeatureISAVersion9_0_9.Features
164164 >;
165165
166 //===----------------------------------------------------------------------===//
167 // GCN GFX10.
168 //===----------------------------------------------------------------------===//
169
170 def : ProcessorModel<"gfx1010", GFX10SpeedModel,
171 FeatureISAVersion10_1_0.Features
172 >;
5959 AMDGPU::GPUKind AK;
6060
6161 switch (ElfMach) {
62 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
63 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
64 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
65 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
66 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
67 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
68 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
69 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
70 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
71 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
72 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
73 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
74 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
75 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
76 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
77 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
94 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
62 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
63 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
64 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
65 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
66 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
67 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
68 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
69 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
70 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
71 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
72 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
73 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
74 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
75 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
76 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
77 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
95 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
9596 }
9697
9798 StringRef GPUName = getArchNameAMDGCN(AK);
138139 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
139140 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
140141 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
142 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
141143 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
142144 }
143145
323325 PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
324326 compute_pgm_rsrc1,
325327 amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
328 if (IVersion.Major >= 10) {
329 PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
330 compute_pgm_rsrc1,
331 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
332 PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
333 compute_pgm_rsrc1,
334 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
335 PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
336 compute_pgm_rsrc1,
337 amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
338 }
326339 PRINT_FIELD(
327340 OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
328341 compute_pgm_rsrc2,
522522 #define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23)
523523 #define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1)
524524 #define C_00B848_IEEE_MODE 0xFF7FFFFF
525 #define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29)
526 #define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1)
527 #define C_00B848_WGP_MODE 0xDFFFFFFF
528 #define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30)
529 #define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1)
530 #define C_00B848_MEM_ORDERED 0xBFFFFFFF
531 #define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31)
532 #define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1)
533 #define C_00B848_FWD_PROGRESS 0x7FFFFFFF
525534
526535
527536 // Helpers for setting FLOAT_MODE
552561 #define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
553562 #define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
554563
564 #define R_028B54_VGT_SHADER_STAGES_EN 0x028B54
565 #define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21)
566 #define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22)
567 #define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23)
568 #define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8
569 #define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15)
570 #define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800
571 #define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15)
572
555573 #define R_SPILLED_SGPRS 0x4
556574 #define R_SPILLED_VGPRS 0x8
557575 } // End namespace llvm
55905590 SDWA = 2,
55915591 SDWA9 = 3,
55925592 GFX80 = 4,
5593 GFX9 = 5
5593 GFX9 = 5,
5594 GFX10 = 6,
5595 SDWA10 = 7
55945596 };
55955597
55965598 static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
56035605 case AMDGPUSubtarget::VOLCANIC_ISLANDS:
56045606 case AMDGPUSubtarget::GFX9:
56055607 return SIEncodingFamily::VI;
5608 case AMDGPUSubtarget::GFX10:
5609 return SIEncodingFamily::GFX10;
56065610 }
56075611 llvm_unreachable("Unknown subtarget generation!");
56085612 }
2222 int SDWA9 = 3;
2323 int GFX80 = 4;
2424 int GFX9 = 5;
25 int GFX10 = 6;
26 int SDWA10 = 7;
2527 }
2628
2729 //===----------------------------------------------------------------------===//
111111 }
112112
113113 foreach Index = 0-15 in {
114 def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
115 def TTMP#Index#_gfx9 : SIReg<"ttmp"#Index, !add(108, Index)>;
116 def TTMP#Index : SIReg<"", 0>;
114 def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
115 def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>;
116 def TTMP#Index : SIReg<"", 0>;
117117 }
118118
119119 multiclass FLAT_SCR_LOHI_m ci_e, bits<16> vi_e> {
310310 getSubRegs.ret>;
311311
312312 foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
313 def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
314 def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>;
313 def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
314 def TTMP#Index#_TTMP#!add(Index,1)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 2, Index>;
315315 }
316316
317317 foreach Index = {0, 4, 8, 12} in {
320320 _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>;
321321 def TTMP#Index#_TTMP#!add(Index,1)#
322322 _TTMP#!add(Index,2)#
323 _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>;
323 _TTMP#!add(Index,3)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 4, Index>;
324324 }
325325
326326 foreach Index = {0, 4, 8} in {
337337 _TTMP#!add(Index,4)#
338338 _TTMP#!add(Index,5)#
339339 _TTMP#!add(Index,6)#
340 _TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>;
340 _TTMP#!add(Index,7)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 8, Index>;
341341 }
342342
343343 def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi :
347347 TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi,
348348 TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>;
349349
350 def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 :
350 def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9_gfx10 :
351351 TmpRegTuplesBase<0, 16,
352 [TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9,
353 TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9,
354 TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9,
355 TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>;
352 [TTMP0_gfx9_gfx10, TTMP1_gfx9_gfx10, TTMP2_gfx9_gfx10, TTMP3_gfx9_gfx10,
353 TTMP4_gfx9_gfx10, TTMP5_gfx9_gfx10, TTMP6_gfx9_gfx10, TTMP7_gfx9_gfx10,
354 TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10,
355 TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>;
356356
357357
358358 // VGPR 32-bit registers
3636 // half rate f64 instruction (same as v_add_f64)
3737 def WriteDoubleAdd : SchedWrite;
3838
39 // Conversion to or from f64 instruction
40 def WriteDoubleCvt : SchedWrite;
41
3942 // Half rate 64-bit instructions.
4043 def Write64Bit : SchedWrite;
4144
6063
6164 def SIFullSpeedModel : SISchedMachineModel;
6265 def SIQuarterSpeedModel : SISchedMachineModel;
66 def GFX10SpeedModel : SISchedMachineModel;
6367
6468 // XXX: Are the resource counts correct?
6569 def HWBranch : ProcResource<1> {
7882 let BufferSize = 15; // Taken from S_WAITCNT
7983 }
8084 def HWVALU : ProcResource<1> {
85 let BufferSize = 1;
86 }
87 def HWRC : ProcResource<1> { // Register destination cache
8188 let BufferSize = 1;
8289 }
8390
123130 def : HWVALUWriteRes;
124131 def : HWVALUWriteRes;
125132 def : HWVALUWriteRes;
133 def : HWVALUWriteRes;
126134
127135 def : InstRW<[WriteCopy], (instrs COPY)>;
128136
135143 def : HWVALUWriteRes;
136144 def : HWVALUWriteRes;
137145 def : HWVALUWriteRes;
146 def : HWVALUWriteRes;
138147
139148 def : InstRW<[WriteCopy], (instrs COPY)>;
140149
141150 } // End SchedModel = SIQuarterSpeedModel
151
152 let SchedModel = GFX10SpeedModel in {
153
154 // The latency values are 1 / (operations / cycle).
155 // Add 1 stall cycle for VGPR read.
156 def : HWWriteRes;
157 def : HWWriteRes;
158 def : HWWriteRes;
159 def : HWWriteRes;
160 def : HWWriteRes;
161 def : HWWriteRes;
162 def : HWWriteRes;
163
164 def : HWWriteRes;
165 def : HWWriteRes;
166 def : HWWriteRes;
167 def : HWWriteRes;
168 def : HWWriteRes;
169 def : HWWriteRes;
170 def : HWWriteRes;
171
172 def : InstRW<[WriteCopy], (instrs COPY)>;
173
174 } // End SchedModel = GFX10SpeedModel
434434 Header.kernarg_segment_alignment = 4;
435435 Header.group_segment_alignment = 4;
436436 Header.private_segment_alignment = 4;
437 }
438
439 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
437
438 if (Version.Major >= 10) {
439 Header.compute_pgm_resource_registers |=
440 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
441 S_00B848_MEM_ORDERED(1);
442 }
443 }
444
445 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
446 const MCSubtargetInfo *STI) {
447 IsaVersion Version = getIsaVersion(STI->getCPU());
448
440449 amdhsa::kernel_descriptor_t KD;
441450 memset(&KD, 0, sizeof(KD));
451
442452 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
443453 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
444454 amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
448458 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
449459 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
450460 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
461 if (Version.Major >= 10) {
462 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
463 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
464 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
465 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
466 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
467 }
451468 return KD;
452469 }
453470
678695 return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
679696 }
680697
698 bool isGFX10(const MCSubtargetInfo &STI) {
699 return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
700 }
701
681702 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
682703 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
683704 }
703724 CASE_CI_VI(FLAT_SCR) \
704725 CASE_CI_VI(FLAT_SCR_LO) \
705726 CASE_CI_VI(FLAT_SCR_HI) \
706 CASE_VI_GFX9(TTMP0) \
707 CASE_VI_GFX9(TTMP1) \
708 CASE_VI_GFX9(TTMP2) \
709 CASE_VI_GFX9(TTMP3) \
710 CASE_VI_GFX9(TTMP4) \
711 CASE_VI_GFX9(TTMP5) \
712 CASE_VI_GFX9(TTMP6) \
713 CASE_VI_GFX9(TTMP7) \
714 CASE_VI_GFX9(TTMP8) \
715 CASE_VI_GFX9(TTMP9) \
716 CASE_VI_GFX9(TTMP10) \
717 CASE_VI_GFX9(TTMP11) \
718 CASE_VI_GFX9(TTMP12) \
719 CASE_VI_GFX9(TTMP13) \
720 CASE_VI_GFX9(TTMP14) \
721 CASE_VI_GFX9(TTMP15) \
722 CASE_VI_GFX9(TTMP0_TTMP1) \
723 CASE_VI_GFX9(TTMP2_TTMP3) \
724 CASE_VI_GFX9(TTMP4_TTMP5) \
725 CASE_VI_GFX9(TTMP6_TTMP7) \
726 CASE_VI_GFX9(TTMP8_TTMP9) \
727 CASE_VI_GFX9(TTMP10_TTMP11) \
728 CASE_VI_GFX9(TTMP12_TTMP13) \
729 CASE_VI_GFX9(TTMP14_TTMP15) \
730 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
731 CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
732 CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
733 CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
734 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
735 CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
736 CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
737 CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
727 CASE_VI_GFX9_GFX10(TTMP0) \
728 CASE_VI_GFX9_GFX10(TTMP1) \
729 CASE_VI_GFX9_GFX10(TTMP2) \
730 CASE_VI_GFX9_GFX10(TTMP3) \
731 CASE_VI_GFX9_GFX10(TTMP4) \
732 CASE_VI_GFX9_GFX10(TTMP5) \
733 CASE_VI_GFX9_GFX10(TTMP6) \
734 CASE_VI_GFX9_GFX10(TTMP7) \
735 CASE_VI_GFX9_GFX10(TTMP8) \
736 CASE_VI_GFX9_GFX10(TTMP9) \
737 CASE_VI_GFX9_GFX10(TTMP10) \
738 CASE_VI_GFX9_GFX10(TTMP11) \
739 CASE_VI_GFX9_GFX10(TTMP12) \
740 CASE_VI_GFX9_GFX10(TTMP13) \
741 CASE_VI_GFX9_GFX10(TTMP14) \
742 CASE_VI_GFX9_GFX10(TTMP15) \
743 CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
744 CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
745 CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
746 CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
747 CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
748 CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
749 CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
750 CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
751 CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
752 CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
753 CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
754 CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
755 CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
756 CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
757 CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
758 CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
738759 }
739760
740761 #define CASE_CI_VI(node) \
741762 assert(!isSI(STI)); \
742763 case node: return isCI(STI) ? node##_ci : node##_vi;
743764
744 #define CASE_VI_GFX9(node) \
745 case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
765 #define CASE_VI_GFX9_GFX10(node) \
766 case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
746767
747768 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
748769 if (STI.getTargetTriple().getArch() == Triple::r600)
751772 }
752773
753774 #undef CASE_CI_VI
754 #undef CASE_VI_GFX9
775 #undef CASE_VI_GFX9_GFX10
755776
756777 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
757 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
778 #define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
758779
759780 unsigned mc2PseudoReg(unsigned Reg) {
760781 MAP_REG2REG
761782 }
762783
763784 #undef CASE_CI_VI
764 #undef CASE_VI_GFX9
785 #undef CASE_VI_GFX9_GFX10
765786 #undef MAP_REG2REG
766787
767788 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
10291050 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
10301051 return lookupSourceOfDivergence(IntrID);
10311052 }
1053
10321054 } // namespace AMDGPU
10331055 } // namespace llvm
243243 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
244244 const MCSubtargetInfo *STI);
245245
246 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
246 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
247 const MCSubtargetInfo *STI);
247248
248249 bool isGroupSegment(const GlobalValue *GV);
249250 bool isGlobalSegment(const GlobalValue *GV);
397398 bool isCI(const MCSubtargetInfo &STI);
398399 bool isVI(const MCSubtargetInfo &STI);
399400 bool isGFX9(const MCSubtargetInfo &STI);
401 bool isGFX10(const MCSubtargetInfo &STI);
400402
401403 /// Is Reg - scalar register
402404 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
8181 COMPPGM1(enable_dx10_clamp, compute_pgm_rsrc1_dx10_clamp, DX10_CLAMP),
8282 COMPPGM1(debug_mode, compute_pgm_rsrc1_debug_mode, DEBUG_MODE),
8383 COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE_MODE),
84 COMPPGM1(enable_wgp_mode, compute_pgm_rsrc1_wgp_mode, WGP_MODE),
85 COMPPGM1(enable_mem_ordered, compute_pgm_rsrc1_mem_ordered, MEM_ORDERED),
86 COMPPGM1(enable_fwd_progress, compute_pgm_rsrc1_fwd_progress, FWD_PROGRESS),
8487 // TODO: bulky
8588 // TODO: cdbg_user
8689 COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
4646 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx904 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX904 %s
4747 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX906 %s
4848 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx909 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s
49 ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
4950
5051 ; ARCH-R600: Arch: r600
5152 ; ARCH-GCN: Arch: amdgcn
8687 ; GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E)
8788 ; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
8889 ; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
90 ; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
8991 ; ALL: ]
9092
9193 define amdgpu_kernel void @elf_header() {
2323 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX904 %s
2424 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s
2525 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx909 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX909 %s
26 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1010 %s
2627
2728 ; HSA: .hsa_code_object_version 2,1
2829 ; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
4142 ; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU"
4243 ; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU"
4344 ; HSA-GFX909: .hsa_code_object_isa 9,0,9,"AMD","AMDGPU"
45 ; HSA-GFX1010: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
12741274 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
12751275 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
12761276 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
1277 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
12771278 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
12781279 LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
12791280 };