llvm.org GIT mirror llvm / 944600b
[AMDGPU] Optimize image_[load|store]_mip Summary: Replace image_load_mip/image_store_mip with image_load/image_store if lod is 0. Reviewers: arsenm, nhaehnle Reviewed By: arsenm Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63073 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362957 91177308-0d34-0410-b5e6-96231b3b80d8 Piotr Sobczak 4 months ago
5 changed file(s) with 175 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
9898
9999 let PrimaryKey = ["L"];
100100 let PrimaryKeyName = "getMIMGLZMappingInfo";
101 }
102
103 class MIMGMIPMapping {
104 MIMGBaseOpcode MIP = mip;
105 MIMGBaseOpcode NONMIP = nonmip;
106 }
107
108 def MIMGMIPMappingTable : GenericTable {
109 let FilterClass = "MIMGMIPMapping";
110 let CppTypeName = "MIMGMIPMappingInfo";
111 let Fields = ["MIP", "NONMIP"];
112 GenericEnum TypeOf_MIP = MIMGBaseOpcode;
113 GenericEnum TypeOf_NONMIP = MIMGBaseOpcode;
114
115 let PrimaryKey = ["MIP"];
116 let PrimaryKeyName = "getMIMGMIPMappingInfo";
101117 }
102118
103119 class MIMG
807823 def : MIMGLZMapping;
808824 def : MIMGLZMapping;
809825 def : MIMGLZMapping;
826
827 // MIP to NONMIP Optimization Mapping
828 def : MIMGMIPMapping;
829 def : MIMGMIPMapping;
48624862 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
48634863 const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
48644864 AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
4865 const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
4866 AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
48654867 unsigned IntrOpcode = Intr->BaseOpcode;
48664868 bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
48674869
49604962 dyn_cast(Op.getOperand(AddrIdx+NumVAddrs-1))) {
49614963 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
49624964 IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
4965 NumMIVAddrs--; // remove 'lod'
4966 }
4967 }
4968 }
4969
4970 // Optimize _mip away, when 'lod' is zero
4971 if (MIPMappingInfo) {
4972 if (auto ConstantLod =
4973 dyn_cast(Op.getOperand(AddrIdx+NumVAddrs-1))) {
4974 if (ConstantLod->isNullValue()) {
4975 IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
49634976 NumMIVAddrs--; // remove 'lod'
49644977 }
49654978 }
103103 #define GET_MIMGDimInfoTable_IMPL
104104 #define GET_MIMGInfoTable_IMPL
105105 #define GET_MIMGLZMappingTable_IMPL
106 #define GET_MIMGMIPMappingTable_IMPL
106107 #include "AMDGPUGenSearchableTables.inc"
107108
108109 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
4444 #define GET_MIMGDim_DECL
4545 #define GET_MIMGEncoding_DECL
4646 #define GET_MIMGLZMapping_DECL
47 #define GET_MIMGMIPMapping_DECL
4748 #include "AMDGPUGenSearchableTables.inc"
4849
4950 namespace IsaInfo {
217218 MIMGBaseOpcode LZ;
218219 };
219220
221 struct MIMGMIPMappingInfo {
222 MIMGBaseOpcode MIP;
223 MIMGBaseOpcode NONMIP;
224 };
225
220226 LLVM_READONLY
221227 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
228
229 LLVM_READONLY
230 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
222231
223232 LLVM_READONLY
224233 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
0 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
2
3
4 ; GCN-LABEL: {{^}}load_mip_1d:
5 ; GCN-NOT: image_load_mip
6 ; GCN: image_load
7 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s) {
8 main_body:
9 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
10 ret <4 x float> %v
11 }
12
13 ; GCN-LABEL: {{^}}load_mip_2d:
14 ; GCN-NOT: image_load_mip
15 ; GCN: image_load
16 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
17 main_body:
18 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
19 ret <4 x float> %v
20 }
21
22 ; GCN-LABEL: {{^}}load_mip_3d:
23 ; GCN-NOT: image_load_mip
24 ; GCN: image_load
25 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
26 main_body:
27 %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
28 ret <4 x float> %v
29 }
30
31 ; GCN-LABEL: {{^}}load_mip_1darray:
32 ; GCN-NOT: image_load_mip
33 ; GCN: image_load
34 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
35 main_body:
36 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
37 ret <4 x float> %v
38 }
39
40 ; GCN-LABEL: {{^}}load_mip_2darray:
41 ; GCN-NOT: image_load_mip
42 ; GCN: image_load
43 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
44 main_body:
45 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
46 ret <4 x float> %v
47 }
48
49 ; GCN-LABEL: {{^}}load_mip_cube:
50 ; GCN-NOT: image_load_mip
51 ; GCN: image_load
52 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
53 main_body:
54 %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
55 ret <4 x float> %v
56 }
57
58
59
60 ; GCN-LABEL: {{^}}store_mip_1d:
61 ; GCN-NOT: image_store_mip
62 ; GCN: image_store
63 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
64 main_body:
65 call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
66 ret void
67 }
68
69 ; GCN-LABEL: {{^}}store_mip_2d:
70 ; GCN-NOT: image_store_mip
71 ; GCN: image_store
72 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
73 main_body:
74 call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
75 ret void
76 }
77
78 ; GCN-LABEL: {{^}}store_mip_3d:
79 ; GCN-NOT: image_store_mip
80 ; GCN: image_store
81 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
82 main_body:
83 call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
84 ret void
85 }
86
87 ; GCN-LABEL: {{^}}store_mip_1darray:
88 ; GCN-NOT: image_store_mip
89 ; GCN: image_store
90 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
91 main_body:
92 call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
93 ret void
94 }
95
96 ; GCN-LABEL: {{^}}store_mip_2darray:
97 ; GCN-NOT: image_store_mip
98 ; GCN: image_store
99 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
100 main_body:
101 call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
102 ret void
103 }
104
105 ; GCN-LABEL: {{^}}store_mip_cube:
106 ; GCN-NOT: image_store_mip
107 ; GCN: image_store
108 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
109 main_body:
110 call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
111 ret void
112 }
113
114 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
115 declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
116 declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
117 declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
118 declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
119 declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
120
121
122 declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
123 declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
124 declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
125 declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
126 declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
127 declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
128
129 attributes #0 = { nounwind }
130 attributes #1 = { nounwind readonly }
131