llvm.org GIT mirror llvm / c3fd6a1
[AMDGPU] w/a for gfx908 mfma SrcC literal HW bug gfx908 ignores an mfma if SrcC is a literal. Differential Revision: https://reviews.llvm.org/D66670 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369816 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 28 days ago
8 changed file(s) with 48 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
151151 "LDSMisalignedBug",
152152 "true",
153153 "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
154 >;
155
156 def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug",
157 "HasMFMAInlineLiteralBug",
158 "true",
159 "MFMA cannot use inline literal as SrcC"
154160 >;
155161
156162 def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
810816 FeaturePkFmacF16Inst,
811817 FeatureAtomicFaddInsts,
812818 FeatureSRAMECC,
819 FeatureMFMAInlineLiteralBug,
813820 FeatureCodeObjectV3]>;
814821
815822 def FeatureISAVersion9_0_9 : FeatureSet<
261261 AddNoCarryInsts(false),
262262 HasUnpackedD16VMem(false),
263263 LDSMisalignedBug(false),
264 HasMFMAInlineLiteralBug(false),
264265
265266 ScalarizeGlobal(false),
266267
367367 bool CaymanISA;
368368 bool CFALUBug;
369369 bool LDSMisalignedBug;
370 bool HasMFMAInlineLiteralBug;
370371 bool HasVertexCache;
371372 short TexVTXClauseSize;
372373 bool ScalarizeGlobal;
984985
985986 bool hasSGPRInitBug() const {
986987 return SGPRInitBug;
988 }
989
990 bool hasMFMAInlineLiteralBug() const {
991 return HasMFMAInlineLiteralBug;
987992 }
988993
989994 bool has12DWordStoreHazard() const {
434434 OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST)
435435 return false;
436436
437 if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy)) {
437 if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) &&
438 TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
438439 UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
439440 return true;
440441 }
479480 if (Imm != SubImm)
480481 return false; // Can only fold splat constants
481482 }
483
484 if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
485 return false;
482486
483487 FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
484488 return true;
6060
6161 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
6262 AMDGPURegisterInfo(),
63 ST(ST),
6364 SGPRPressureSets(getNumRegPressureSets()),
6465 VGPRPressureSets(getNumRegPressureSets()),
6566 AGPRPressureSets(getNumRegPressureSets()),
15811582 }
15821583 }
15831584
1585 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
1586 if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
1587 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST)
1588 return !ST.hasMFMAInlineLiteralBug();
1589
1590 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1591 OpType <= AMDGPU::OPERAND_SRC_LAST;
1592 }
1593
15841594 bool SIRegisterInfo::shouldRewriteCopySrc(
15851595 const TargetRegisterClass *DefRC,
15861596 unsigned DefSubReg,
2626
2727 class SIRegisterInfo final : public AMDGPURegisterInfo {
2828 private:
29 const GCNSubtarget &ST;
2930 unsigned SGPRSetID;
3031 unsigned VGPRSetID;
3132 unsigned AGPRSetID;
192193 /// \returns True if operands defined with this operand type can accept
193194 /// an inline constant. i.e. An integer value in the range (-16, 64) or
194195 /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
195 bool opCanUseInlineConstant(unsigned OpType) const {
196 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
197 OpType <= AMDGPU::OPERAND_SRC_LAST;
198 }
196 bool opCanUseInlineConstant(unsigned OpType) const;
199197
200198 unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
201199 const TargetRegisterClass *RC,
22 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
33
44 ; GCN-LABEL: {{^}}test_32_agprs:
5 ; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}}, 0
5 ; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}},
66 ; GCN-NOT: v28
77 ; GCN: NumVgprs: 32
88 ; GCN: VGPRBlocks: 7
None ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
0 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,NOLIT-SRCC %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,LIT-SRCC %s
12
23 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
34 declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
992993 ; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32_imm_splat:
993994 ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
994995 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
995 ; GCN: v_mfma_f32_4x4x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0
996 ; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
997 ; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
998 ; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
999 ; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
1000 ; NOLIT-SRCC: v_mfma_f32_4x4x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9:]+}}]
1001 ; LIT-SRCC: v_mfma_f32_4x4x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0
9961002 ; GCN: v_accvgpr_read_b32
9971003 ; GCN: v_accvgpr_read_b32
9981004 ; GCN: v_accvgpr_read_b32
10081014 ; GCN-LABEL: {{^}}test_mfma_f32_16x16x1f32_imm_splat:
10091015 ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
10101016 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
1011 ; GCN: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0
1017 ; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
1018 ; NOLIT-SRCC: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9:]+}}]
1019 ; LIT-SRCC: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0
10121020 ; GCN-DAG: v_accvgpr_read_b32
10131021 ; GCN-DAG: v_accvgpr_read_b32
10141022 ; GCN-DAG: v_accvgpr_read_b32
10391047 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x8f16_imm_splat:
10401048 ; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 0x40004000
10411049 ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 0x3c003c00
1042 ; GCN: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], 1.0
1050 ; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
1051 ; NOLIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9:]+}}]
1052 ; LIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], 1.0
10431053 ; GCN-DAG: v_accvgpr_read_b32
10441054 ; GCN-DAG: v_accvgpr_read_b32
10451055 ; GCN-DAG: v_accvgpr_read_b32
10701080 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_imm_splat:
10711081 ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
10721082 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
1073 ; GCN: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 0
1083 ; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0
1084 ; NOLIT-SRCC: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9:]+}}]
1085 ; LIT-SRCC: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 0
10741086 ; GCN-DAG: v_accvgpr_read_b32
10751087 ; GCN-DAG: v_accvgpr_read_b32
10761088 ; GCN-DAG: v_accvgpr_read_b32