llvm.org GIT mirror llvm / 9638ce5
Merging r238147: ------------------------------------------------------------------------ r238147 | thomas.stellard | 2015-05-25 12:15:54 -0400 (Mon, 25 May 2015) | 4 lines R600/SI: Fix bug with v_interp_p1_f32 instructions on 16 bank lds chips The src and dst register cannot be the same on chips with 16 lds banks. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@240285 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 4 years ago
6 changed file(s) with 102 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
120120 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
121121 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
122122
123 class SubtargetFeatureLDSBankCount : SubtargetFeature <
124 "ldsbankcount"#Value,
125 "LDSBankCount",
126 !cast(Value),
127 "The number of LDS banks per compute unit.">;
128
129 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
130 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
131
123132 class SubtargetFeatureLocalMemorySize : SubtargetFeature<
124133 "localmemorysize"#Value,
125134 "LocalMemorySize",
151160
152161 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
153162 [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
154 FeatureWavefrontSize64]>;
163 FeatureWavefrontSize64, FeatureLDSBankCount32]>;
155164
156165 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
157166 [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
159168
160169 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
161170 [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
162 FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
171 FeatureWavefrontSize64, FeatureFlatAddressSpace,
172 FeatureLDSBankCount32]>;
163173
164174 //===----------------------------------------------------------------------===//
165175
8080 EnablePromoteAlloca(false), EnableIfCvt(true),
8181 EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
8282 EnableVGPRSpilling(false),SGPRInitBug(false),
83 LDSBankCount(0),
8384 DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
8485 FrameLowering(TargetFrameLowering::StackGrowsUp,
8586 64 * 16, // Maximum stack alignment (long16)
7070 int LocalMemorySize;
7171 bool EnableVGPRSpilling;
7272 bool SGPRInitBug;
73 int LDSBankCount;
7374
7475 const DataLayout DL;
7576 AMDGPUFrameLowering FrameLowering;
211212 return SGPRInitBug;
212213 }
213214
215 int getLDSBankCount() const {
216 return LDSBankCount;
217 }
218
214219 unsigned getAmdKernelCodeChipID() const;
215220
216221 bool enableMachineScheduler() const override {
9898 // Sea Islands
9999 //===----------------------------------------------------------------------===//
100100
101 def : ProcessorModel<"bonaire", SIQuarterSpeedModel, [FeatureSeaIslands]>;
101 def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
102 [FeatureSeaIslands, FeatureLDSBankCount32]
103 >;
102104
103 def : ProcessorModel<"kabini", SIQuarterSpeedModel, [FeatureSeaIslands]>;
105 def : ProcessorModel<"kabini", SIQuarterSpeedModel,
106 [FeatureSeaIslands, FeatureLDSBankCount16]
107 >;
104108
105 def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>;
109 def : ProcessorModel<"kaveri", SIQuarterSpeedModel,
110 [FeatureSeaIslands, FeatureLDSBankCount32]
111 >;
106112
107 def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureSeaIslands]>;
113 def : ProcessorModel<"hawaii", SIFullSpeedModel,
114 [FeatureSeaIslands, FeatureLDSBankCount32]
115 >;
108116
109 def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
117 def : ProcessorModel<"mullins", SIQuarterSpeedModel,
118 [FeatureSeaIslands, FeatureLDSBankCount16]>;
110119
111120 //===----------------------------------------------------------------------===//
112121 // Volcanic Islands
3838 >;
3939
4040 def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
41
42 def has16BankLDS : Predicate<"Subtarget.getLDSBankCount() == 16">;
43 def has32BankLDS : Predicate<"Subtarget.getLDSBankCount() == 32">;
4144
4245 def SWaitMatchClass : AsmOperandClass {
4346 let Name = "SWaitCnt";
13751378 //===----------------------------------------------------------------------===//
13761379
13771380 // FIXME: Specify SchedRW for VINTRP insturctions.
1378 defm V_INTERP_P1_F32 : VINTRP_m <
1379 0x00000000,
1381
1382 multiclass V_INTERP_P1_F32_m : VINTRP_m <
1383 0x00000000,
13801384 (outs VGPR_32:$dst),
13811385 (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
13821386 "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]",
1383 "$m0">;
1387 "$m0"
1388 >;
1389
1390 let OtherPredicates = [has32BankLDS] in {
1391
1392 defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
1393
1394 } // End OtherPredicates = [has32BankLDS]
1395
1396 let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $dst" in {
1397
1398 defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
1399
1400 } // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $dst"
13841401
13851402 defm V_INTERP_P2_F32 : VINTRP_m <
13861403 0x00000001,
26712688 (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
26722689 >;
26732690
2674 def : Pat <
2691 class FSInterpPat : Pat <
26752692 (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
2676 (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
2693 (V_INTERP_P2_F32 (P1 (EXTRACT_SUBREG v2i32:$ij, sub0),
26772694 imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)),
26782695 (EXTRACT_SUBREG $ij, sub1),
26792696 imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
26802697 >;
2698
2699 let Predicates = [has32BankLDS] in {
2700
2701 def : FSInterpPat;
2702
2703 } // EndPredicates = [has32BankLDS]
2704
2705 let Predicates = [has16BankLDS] in {
2706
2707 def : FSInterpPat;
2708
2709 } // End Predicates = [has32BankLDS]
26812710
26822711 /********** ================== **********/
26832712 /********** Intrinsic Patterns **********/
None ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
0 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=kabini -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=16BANK %s
2 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
23
3 ;CHECK-NOT: s_wqm
4 ;CHECK: s_mov_b32
5 ;CHECK: v_interp_p1_f32
6 ;CHECK: v_interp_p2_f32
7 ;CHECK: v_interp_mov_f32
4 ;GCN-NOT: s_wqm
5 ;GCN: s_mov_b32
6 ;GCN: v_interp_p1_f32
7 ;GCN: v_interp_p2_f32
8 ;GCN: v_interp_mov_f32
89
910 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
1011 main_body:
1516 ret void
1617 }
1718
18 declare void @llvm.AMDGPU.shader.type(i32)
19 ; Thest that v_interp_p1 uses different source and destination registers
20 ; on 16 bank LDS chips.
21
22 ; 16BANK-LABEL: {{^}}v_interp_p1_bank16_bug:
23 ; 16BANK-NOT: v_interp_p1_f32 [[DST:v[0-9]+]], [[DST]]
24
25 define void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
26 main_body:
27 %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
28 %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
29 %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
30 %25 = call float @fabs(float %22)
31 %26 = call float @fabs(float %23)
32 %27 = call float @fabs(float %24)
33 %28 = call i32 @llvm.SI.packf16(float %25, float %26)
34 %29 = bitcast i32 %28 to float
35 %30 = call i32 @llvm.SI.packf16(float %27, float 1.000000e+00)
36 %31 = bitcast i32 %30 to float
37 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31)
38 ret void
39 }
40
41 ; Function Attrs: readnone
42 declare float @fabs(float) #2
43
44 ; Function Attrs: nounwind readnone
45 declare i32 @llvm.SI.packf16(float, float) #1
1946
2047 ; Function Attrs: nounwind readnone
2148 declare float @llvm.SI.fs.constant(i32, i32, i32) #1
2754
2855 attributes #0 = { "ShaderType"="0" }
2956 attributes #1 = { nounwind readnone }
57 attributes #2 = { readnone }