llvm.org GIT mirror llvm / dad6e61
[AMDGPU] Add intrinsics for tbuffer load and store Intrinsic already existed for llvm.SI.tbuffer.store Needed tbuffer.load and also re-implementing the intrinsic as llvm.amdgcn.tbuffer.* Added CodeGen tests for the 2 new variants added. Left the original llvm.SI.tbuffer.store implementation to avoid issues with existing code Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, tpr Differential Revision: https://reviews.llvm.org/D30687 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306031 91177308-0d34-0410-b5e6-96231b3b80d8 David Stuttard 2 years ago
19 changed file(s) with 915 addition(s) and 166 deletion(s). Raw diff Collapse all Expand all
473473 [IntrWriteMem]>;
474474 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
475475 def int_amdgcn_buffer_store : AMDGPUBufferStore;
476
477 def int_amdgcn_tbuffer_load : Intrinsic <
478 [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
479 [llvm_v4i32_ty, // rsrc(SGPR)
480 llvm_i32_ty, // vindex(VGPR)
481 llvm_i32_ty, // voffset(VGPR)
482 llvm_i32_ty, // soffset(SGPR)
483 llvm_i32_ty, // offset(imm)
484 llvm_i32_ty, // dfmt(imm)
485 llvm_i32_ty, // nfmt(imm)
486 llvm_i1_ty, // glc(imm)
487 llvm_i1_ty], // slc(imm)
488 []>;
489
490 def int_amdgcn_tbuffer_store : Intrinsic <
491 [],
492 [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
493 llvm_v4i32_ty, // rsrc(SGPR)
494 llvm_i32_ty, // vindex(VGPR)
495 llvm_i32_ty, // voffset(VGPR)
496 llvm_i32_ty, // soffset(SGPR)
497 llvm_i32_ty, // offset(imm)
498 llvm_i32_ty, // dfmt(imm)
499 llvm_i32_ty, // nfmt(imm)
500 llvm_i1_ty, // glc(imm)
501 llvm_i1_ty], // slc(imm)
502 []>;
476503
477504 class AMDGPUBufferAtomic : Intrinsic <
478505 [llvm_i32_ty],
36633663 NODE_NAME_CASE(STORE_MSKOR)
36643664 NODE_NAME_CASE(LOAD_CONSTANT)
36653665 NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
3666 NODE_NAME_CASE(TBUFFER_STORE_FORMAT_X3)
3667 NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
36663668 NODE_NAME_CASE(ATOMIC_CMP_SWAP)
36673669 NODE_NAME_CASE(ATOMIC_INC)
36683670 NODE_NAME_CASE(ATOMIC_DEC)
402402 STORE_MSKOR,
403403 LOAD_CONSTANT,
404404 TBUFFER_STORE_FORMAT,
405 TBUFFER_STORE_FORMAT_X3,
406 TBUFFER_LOAD_FORMAT,
405407 ATOMIC_CMP_SWAP,
406408 ATOMIC_INC,
407409 ATOMIC_DEC,
151151 ImmTyExpTgt,
152152 ImmTyExpCompr,
153153 ImmTyExpVM,
154 ImmTyDFMT,
155 ImmTyNFMT,
154156 ImmTyHwreg,
155157 ImmTyOff,
156158 ImmTySendMsg,
293295 bool isGLC() const { return isImmTy(ImmTyGLC); }
294296 bool isSLC() const { return isImmTy(ImmTySLC); }
295297 bool isTFE() const { return isImmTy(ImmTyTFE); }
298 bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
299 bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
296300 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
297301 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
298302 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
637641 case ImmTyGLC: OS << "GLC"; break;
638642 case ImmTySLC: OS << "SLC"; break;
639643 case ImmTyTFE: OS << "TFE"; break;
644 case ImmTyDFMT: OS << "DFMT"; break;
645 case ImmTyNFMT: OS << "NFMT"; break;
640646 case ImmTyClampSI: OS << "ClampSI"; break;
641647 case ImmTyOModSI: OS << "OModSI"; break;
642648 case ImmTyDppCtrl: OS << "DppCtrl"; break;
10321038 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
10331039 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
10341040 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1041 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1042
10351043 AMDGPUOperand::Ptr defaultGLC() const;
10361044 AMDGPUOperand::Ptr defaultSLC() const;
10371045 AMDGPUOperand::Ptr defaultTFE() const;
38193827 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
38203828 }
38213829
3830 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
3831 OptionalImmIndexMap OptionalIdx;
3832
3833 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3834 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3835
3836 // Add the register arguments
3837 if (Op.isReg()) {
3838 Op.addRegOperands(Inst, 1);
3839 continue;
3840 }
3841
3842 // Handle the case where soffset is an immediate
3843 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3844 Op.addImmOperands(Inst, 1);
3845 continue;
3846 }
3847
3848 // Handle tokens like 'offen' which are sometimes hard-coded into the
3849 // asm string. There are no MCInst operands for these.
3850 if (Op.isToken()) {
3851 continue;
3852 }
3853 assert(Op.isImm());
3854
3855 // Handle optional arguments
3856 OptionalIdx[Op.getImmTy()] = i;
3857 }
3858
3859 addOptionalImmOperand(Inst, Operands, OptionalIdx,
3860 AMDGPUOperand::ImmTyOffset);
3861 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
3862 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
3863 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3864 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3865 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3866 }
3867
38223868 //===----------------------------------------------------------------------===//
38233869 // mimg
38243870 //===----------------------------------------------------------------------===//
39994045 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
40004046 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
40014047 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
4048 {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr},
4049 {"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr},
40024050 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
40034051 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
40044052 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
5656 string OpName = NAME # suffix;
5757 }
5858
59 class MTBUFAddr64Table {
60 bit IsAddr64 = is_addr64;
61 string OpName = NAME # suffix;
62 }
63
5964 //===----------------------------------------------------------------------===//
6065 // MTBUF classes
6166 //===----------------------------------------------------------------------===//
7782 let EXP_CNT = 1;
7883 let MTBUF = 1;
7984 let Uses = [EXEC];
80
8185 let hasSideEffects = 0;
8286 let SchedRW = [WriteVMEM];
87
88 let AsmMatchConverter = "cvtMtbuf";
89
90 bits<1> offen = 0;
91 bits<1> idxen = 0;
92 bits<1> addr64 = 0;
93 bits<1> has_vdata = 1;
94 bits<1> has_vaddr = 1;
95 bits<1> has_glc = 1;
96 bits<1> glc_value = 0; // the value for glc if no such operand
97 bits<4> dfmt_value = 1; // the value for dfmt if no such operand
98 bits<3> nfmt_value = 0; // the value for nfmt if no such operand
99 bits<1> has_srsrc = 1;
100 bits<1> has_soffset = 1;
101 bits<1> has_offset = 1;
102 bits<1> has_slc = 1;
103 bits<1> has_tfe = 1;
104 bits<1> has_dfmt = 1;
105 bits<1> has_nfmt = 1;
83106 }
84107
85108 class MTBUF_Real :
86 InstSI ,
87 Enc64 {
109 InstSI {
88110
89111 let isPseudo = 0;
90112 let isCodeGenOnly = 0;
96118 let DisableEncoding = ps.DisableEncoding;
97119 let TSFlags = ps.TSFlags;
98120
99 bits<8> vdata;
100121 bits<12> offset;
101 bits<1> offen;
102 bits<1> idxen;
103 bits<1> glc;
104 bits<1> addr64;
105 bits<4> dfmt;
106 bits<3> nfmt;
107 bits<8> vaddr;
108 bits<7> srsrc;
109 bits<1> slc;
110 bits<1> tfe;
111 bits<8> soffset;
112
113 let Inst{11-0} = offset;
114 let Inst{12} = offen;
115 let Inst{13} = idxen;
116 let Inst{14} = glc;
117 let Inst{22-19} = dfmt;
118 let Inst{25-23} = nfmt;
119 let Inst{31-26} = 0x3a; //encoding
120 let Inst{39-32} = vaddr;
121 let Inst{47-40} = vdata;
122 let Inst{52-48} = srsrc{6-2};
123 let Inst{54} = slc;
124 let Inst{55} = tfe;
125 let Inst{63-56} = soffset;
126 }
127
128 class MTBUF_Load_Pseudo : MTBUF_Pseudo <
129 opName, (outs regClass:$dst),
130 (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
131 i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc,
132 i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
133 " $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"#
134 " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> {
122 bits<1> glc;
123 bits<4> dfmt;
124 bits<3> nfmt;
125 bits<8> vaddr;
126 bits<8> vdata;
127 bits<7> srsrc;
128 bits<1> slc;
129 bits<1> tfe;
130 bits<8> soffset;
131 }
132
133 class getMTBUFInsDA vdataList,
134 list vaddrList=[]> {
135 RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList));
136 RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
137 dag InsNoData = !if(!empty(vaddrList),
138 (ins SReg_128:$srsrc, SCSrc_b32:$soffset,
139 offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe),
140 (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
141 offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe)
142 );
143 dag InsData = !if(!empty(vaddrList),
144 (ins vdataClass:$vdata, SReg_128:$srsrc,
145 SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
146 slc:$slc, tfe:$tfe),
147 (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
148 SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
149 slc:$slc, tfe:$tfe)
150 );
151 dag ret = !if(!empty(vdataList), InsNoData, InsData);
152 }
153
154 class getMTBUFIns vdataList=[]> {
155 dag ret =
156 !if(!eq(addrKind, BUFAddrKind.Offset), getMTBUFInsDA.ret,
157 !if(!eq(addrKind, BUFAddrKind.OffEn), getMTBUFInsDA.ret,
158 !if(!eq(addrKind, BUFAddrKind.IdxEn), getMTBUFInsDA.ret,
159 !if(!eq(addrKind, BUFAddrKind.BothEn), getMTBUFInsDA.ret,
160 !if(!eq(addrKind, BUFAddrKind.Addr64), getMTBUFInsDA.ret,
161 (ins))))));
162 }
163
164 class getMTBUFAsmOps {
165 string Pfx =
166 !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $dfmt, $nfmt, $soffset",
167 !if(!eq(addrKind, BUFAddrKind.OffEn),
168 "$vaddr, $srsrc, $dfmt, $nfmt, $soffset offen",
169 !if(!eq(addrKind, BUFAddrKind.IdxEn),
170 "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen",
171 !if(!eq(addrKind, BUFAddrKind.BothEn),
172 "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen offen",
173 !if(!eq(addrKind, BUFAddrKind.Addr64),
174 "$vaddr, $srsrc, $dfmt, $nfmt, $soffset addr64",
175 "")))));
176 string ret = Pfx # "$offset";
177 }
178
179 class MTBUF_SetupAddr {
180 bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1,
181 !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
182
183 bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1,
184 !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
185
186 bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0);
187
188 bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1);
189 }
190
191 class MTBUF_Load_Pseudo
192 int addrKind,
193 RegisterClass vdataClass,
194 list pattern=[],
195 // Workaround bug bz30254
196 int addrKindCopy = addrKind>
197 : MTBUF_Pseudo
198 (outs vdataClass:$vdata),
199 getMTBUFIns.ret,
200 " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe",
201 pattern>,
202 MTBUF_SetupAddr {
203 let PseudoInstr = opName # "_" # getAddrName.ret;
135204 let mayLoad = 1;
136205 let mayStore = 0;
137206 }
138207
139 class MTBUF_Store_Pseudo : MTBUF_Pseudo <
140 opName, (outs),
141 (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
142 i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr,
143 SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
144 " $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"#
145 " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> {
208 multiclass MTBUF_Pseudo_Loads
209 ValueType load_vt = i32,
210 SDPatternOperator ld = null_frag> {
211
212 def _OFFSET : MTBUF_Load_Pseudo
213 [(set load_vt:$vdata,
214 (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$dfmt,
215 i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
216 MTBUFAddr64Table<0>;
217
218 def _ADDR64 : MTBUF_Load_Pseudo
219 [(set load_vt:$vdata,
220 (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
221 i8:$dfmt, i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
222 MTBUFAddr64Table<1>;
223
224 def _OFFEN : MTBUF_Load_Pseudo ;
225 def _IDXEN : MTBUF_Load_Pseudo ;
226 def _BOTHEN : MTBUF_Load_Pseudo ;
227
228 let DisableWQM = 1 in {
229 def _OFFSET_exact : MTBUF_Load_Pseudo ;
230 def _OFFEN_exact : MTBUF_Load_Pseudo ;
231 def _IDXEN_exact : MTBUF_Load_Pseudo ;
232 def _BOTHEN_exact : MTBUF_Load_Pseudo ;
233 }
234 }
235
236 class MTBUF_Store_Pseudo
237 int addrKind,
238 RegisterClass vdataClass,
239 list pattern=[],
240 // Workaround bug bz30254
241 int addrKindCopy = addrKind,
242 RegisterClass vdataClassCopy = vdataClass>
243 : MTBUF_Pseudo
244 (outs),
245 getMTBUFIns.ret,
246 " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe",
247 pattern>,
248 MTBUF_SetupAddr {
249 let PseudoInstr = opName # "_" # getAddrName.ret;
146250 let mayLoad = 0;
147251 let mayStore = 1;
148252 }
253
254 multiclass MTBUF_Pseudo_Stores
255 ValueType store_vt = i32,
256 SDPatternOperator st = null_frag> {
257
258 def _OFFSET : MTBUF_Store_Pseudo
259 [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
260 i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
261 i1:$slc, i1:$tfe))]>,
262 MTBUFAddr64Table<0>;
263
264 def _ADDR64 : MTBUF_Store_Pseudo
265 [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
266 i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
267 i1:$slc, i1:$tfe))]>,
268 MTBUFAddr64Table<1>;
269
270 def _OFFEN : MTBUF_Store_Pseudo ;
271 def _IDXEN : MTBUF_Store_Pseudo ;
272 def _BOTHEN : MTBUF_Store_Pseudo ;
273
274 let DisableWQM = 1 in {
275 def _OFFSET_exact : MTBUF_Store_Pseudo ;
276 def _OFFEN_exact : MTBUF_Store_Pseudo ;
277 def _IDXEN_exact : MTBUF_Store_Pseudo ;
278 def _BOTHEN_exact : MTBUF_Store_Pseudo ;
279 }
280 }
281
149282
150283 //===----------------------------------------------------------------------===//
151284 // MUBUF classes
675808 // MTBUF Instructions
676809 //===----------------------------------------------------------------------===//
677810
678 //def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0, "tbuffer_load_format_x", []>;
679 //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <1, "tbuffer_load_format_xy", []>;
680 //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <2, "tbuffer_load_format_xyz", []>;
681 def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Pseudo <"tbuffer_load_format_xyzw", VReg_128>;
682 def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Pseudo <"tbuffer_store_format_x", VGPR_32>;
683 def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Pseudo <"tbuffer_store_format_xy", VReg_64>;
684 def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Pseudo <"tbuffer_store_format_xyz", VReg_128>;
685 def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Pseudo <"tbuffer_store_format_xyzw", VReg_128>;
811 defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>;
812 defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>;
813 defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_128>;
814 defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>;
815 defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>;
816 defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>;
817 defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>;
818 defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
686819
687820 } // End let SubtargetPredicate = isGCN
688821
10921225 // MTBUF Patterns
10931226 //===----------------------------------------------------------------------===//
10941227
1095 // TBUFFER_STORE_FORMAT_*, addr64=0
1096 class MTBUF_StoreResource : Pat<
1097 (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
1098 i32:$soffset, imm:$inst_offset, imm:$dfmt,
1099 imm:$nfmt, imm:$offen, imm:$idxen,
1100 imm:$glc, imm:$slc, imm:$tfe),
1101 (opcode
1102 $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
1103 (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
1104 (as_i1imm $slc), (as_i1imm $tfe), $soffset)
1105 >;
1106
1107 def : MTBUF_StoreResource ;
1108 def : MTBUF_StoreResource ;
1109 def : MTBUF_StoreResource ;
1110 def : MTBUF_StoreResource ;
1228 //===----------------------------------------------------------------------===//
1229 // tbuffer_load/store_format patterns
1230 //===----------------------------------------------------------------------===//
1231
1232 multiclass MTBUF_LoadIntrinsicPat
1233 string opcode> {
1234 def : Pat<
1235 (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
1236 imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
1237 (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
1238 (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
1239 >;
1240
1241 def : Pat<
1242 (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
1243 imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
1244 (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
1245 (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
1246 >;
1247
1248 def : Pat<
1249 (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
1250 imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
1251 (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
1252 (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
1253 >;
1254
1255 def : Pat<
1256 (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
1257 imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
1258 (!cast(opcode # _BOTHEN)
1259 (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
1260 $rsrc, $soffset, (as_i16imm $offset),
1261 (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
1262 >;
1263 }
1264
1265 defm : MTBUF_LoadIntrinsicPat;
1266 defm : MTBUF_LoadIntrinsicPat;
1267 defm : MTBUF_LoadIntrinsicPat;
1268 defm : MTBUF_LoadIntrinsicPat;
1269 defm : MTBUF_LoadIntrinsicPat;
1270 defm : MTBUF_LoadIntrinsicPat;
1271
1272 multiclass MTBUF_StoreIntrinsicPat
1273 string opcode> {
1274 def : Pat<
1275 (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
1276 imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
1277 (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
1278 (as_i16imm $offset), (as_i8imm $dfmt),
1279 (as_i8imm $nfmt), (as_i1imm $glc),
1280 (as_i1imm $slc), 0)
1281 >;
1282
1283 def : Pat<
1284 (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
1285 imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
1286 (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
1287 (as_i16imm $offset), (as_i8imm $dfmt),
1288 (as_i8imm $nfmt), (as_i1imm $glc),
1289 (as_i1imm $slc), 0)
1290 >;
1291
1292 def : Pat<
1293 (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
1294 imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
1295 (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
1296 (as_i16imm $offset), (as_i8imm $dfmt),
1297 (as_i8imm $nfmt), (as_i1imm $glc),
1298 (as_i1imm $slc), 0)
1299 >;
1300
1301 def : Pat<
1302 (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
1303 imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
1304 (!cast(opcode # _BOTHEN_exact)
1305 $vdata,
1306 (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
1307 $rsrc, $soffset, (as_i16imm $offset),
1308 (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
1309 >;
1310 }
1311
1312 defm : MTBUF_StoreIntrinsicPat;
1313 defm : MTBUF_StoreIntrinsicPat;
1314 defm : MTBUF_StoreIntrinsicPat;
1315 defm : MTBUF_StoreIntrinsicPat;
1316 defm : MTBUF_StoreIntrinsicPat;
1317 defm : MTBUF_StoreIntrinsicPat;
1318 defm : MTBUF_StoreIntrinsicPat;
1319 defm : MTBUF_StoreIntrinsicPat;
11111320
11121321 } // End let Predicates = [isGCN]
11131322
12231432
12241433 class MTBUF_Real_si op, MTBUF_Pseudo ps> :
12251434 MTBUF_Real,
1435 Enc64,
12261436 SIMCInstr {
12271437 let AssemblerPredicate=isSICI;
12281438 let DecoderNamespace="SICI";
12291439
1230 bits<1> addr64;
1231 let Inst{15} = addr64;
1440 let Inst{11-0} = !if(ps.has_offset, offset, ?);
1441 let Inst{12} = ps.offen;
1442 let Inst{13} = ps.idxen;
1443 let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
1444 let Inst{15} = ps.addr64;
12321445 let Inst{18-16} = op;
1233 }
1234
1235 def TBUFFER_LOAD_FORMAT_XYZW_si : MTBUF_Real_si <3, TBUFFER_LOAD_FORMAT_XYZW>;
1236 def TBUFFER_STORE_FORMAT_X_si : MTBUF_Real_si <4, TBUFFER_STORE_FORMAT_X>;
1237 def TBUFFER_STORE_FORMAT_XY_si : MTBUF_Real_si <5, TBUFFER_STORE_FORMAT_XY>;
1238 def TBUFFER_STORE_FORMAT_XYZ_si : MTBUF_Real_si <6, TBUFFER_STORE_FORMAT_XYZ>;
1239 def TBUFFER_STORE_FORMAT_XYZW_si : MTBUF_Real_si <7, TBUFFER_STORE_FORMAT_XYZW>;
1240
1446 let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
1447 let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
1448 let Inst{31-26} = 0x3a; //encoding
1449 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
1450 let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
1451 let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
1452 let Inst{54} = !if(ps.has_slc, slc, ?);
1453 let Inst{55} = !if(ps.has_tfe, tfe, ?);
1454 let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
1455 }
1456
1457 multiclass MTBUF_Real_AllAddr_si op> {
1458 def _OFFSET_si : MTBUF_Real_si (NAME#"_OFFSET")>;
1459 def _ADDR64_si : MTBUF_Real_si (NAME#"_ADDR64")>;
1460 def _OFFEN_si : MTBUF_Real_si (NAME#"_OFFEN")>;
1461 def _IDXEN_si : MTBUF_Real_si (NAME#"_IDXEN")>;
1462 def _BOTHEN_si : MTBUF_Real_si (NAME#"_BOTHEN")>;
1463 }
1464
1465 defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>;
1466 defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>;
1467 //defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>;
1468 defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>;
1469 defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>;
1470 defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>;
1471 defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>;
1472 defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
12411473
12421474 //===----------------------------------------------------------------------===//
12431475 // CI
13491581
13501582 class MTBUF_Real_vi op, MTBUF_Pseudo ps> :
13511583 MTBUF_Real,
1584 Enc64,
13521585 SIMCInstr {
13531586 let AssemblerPredicate=isVI;
13541587 let DecoderNamespace="VI";
13551588
1589 let Inst{11-0} = !if(ps.has_offset, offset, ?);
1590 let Inst{12} = ps.offen;
1591 let Inst{13} = ps.idxen;
1592 let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
13561593 let Inst{18-15} = op;
1357 }
1358
1359 def TBUFFER_LOAD_FORMAT_XYZW_vi : MTBUF_Real_vi <3, TBUFFER_LOAD_FORMAT_XYZW>;
1360 def TBUFFER_STORE_FORMAT_X_vi : MTBUF_Real_vi <4, TBUFFER_STORE_FORMAT_X>;
1361 def TBUFFER_STORE_FORMAT_XY_vi : MTBUF_Real_vi <5, TBUFFER_STORE_FORMAT_XY>;
1362 def TBUFFER_STORE_FORMAT_XYZ_vi : MTBUF_Real_vi <6, TBUFFER_STORE_FORMAT_XYZ>;
1363 def TBUFFER_STORE_FORMAT_XYZW_vi : MTBUF_Real_vi <7, TBUFFER_STORE_FORMAT_XYZW>;
1364
1594 let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
1595 let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
1596 let Inst{31-26} = 0x3a; //encoding
1597 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
1598 let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
1599 let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
1600 let Inst{54} = !if(ps.has_slc, slc, ?);
1601 let Inst{55} = !if(ps.has_tfe, tfe, ?);
1602 let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
1603 }
1604
1605 multiclass MTBUF_Real_AllAddr_vi op> {
1606 def _OFFSET_vi : MTBUF_Real_vi (NAME#"_OFFSET")>;
1607 def _OFFEN_vi : MTBUF_Real_vi (NAME#"_OFFEN")>;
1608 def _IDXEN_vi : MTBUF_Real_vi (NAME#"_IDXEN")>;
1609 def _BOTHEN_vi : MTBUF_Real_vi (NAME#"_BOTHEN")>;
1610 }
1611
1612 defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0>;
1613 defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <1>;
1614 //defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <2>;
1615 defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <3>;
1616 defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <4>;
1617 defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <5>;
1618 defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <6>;
1619 defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <7>;
228228 raw_ostream &O) {
229229 if (MI->getOperand(OpNo).getImm())
230230 O << " vm";
231 }
232
233 void AMDGPUInstPrinter::printDFMT(const MCInst *MI, unsigned OpNo,
234 const MCSubtargetInfo &STI,
235 raw_ostream &O) {
236 if (MI->getOperand(OpNo).getImm()) {
237 O << " dfmt:";
238 printU8ImmDecOperand(MI, OpNo, O);
239 }
240 }
241
242 void AMDGPUInstPrinter::printNFMT(const MCInst *MI, unsigned OpNo,
243 const MCSubtargetInfo &STI,
244 raw_ostream &O) {
245 if (MI->getOperand(OpNo).getImm()) {
246 O << " nfmt:";
247 printU8ImmDecOperand(MI, OpNo, O);
248 }
231249 }
232250
233251 void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
8787 const MCSubtargetInfo &STI, raw_ostream &O);
8888 void printExpVM(const MCInst *MI, unsigned OpNo,
8989 const MCSubtargetInfo &STI, raw_ostream &O);
90 void printDFMT(const MCInst *MI, unsigned OpNo,
91 const MCSubtargetInfo &STI, raw_ostream &O);
92 void printNFMT(const MCInst *MI, unsigned OpNo,
93 const MCSubtargetInfo &STI, raw_ostream &O);
9094
9195 void printRegOperand(unsigned RegNo, raw_ostream &O);
9296 void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
32943294 SelectionDAG &DAG) const {
32953295 unsigned IntrID = cast(Op.getOperand(1))->getZExtValue();
32963296 SDLoc DL(Op);
3297 MachineFunction &MF = DAG.getMachineFunction();
3298
32973299 switch (IntrID) {
32983300 case Intrinsic::amdgcn_atomic_inc:
32993301 case Intrinsic::amdgcn_atomic_dec: {
33193321 Op.getOperand(5), // glc
33203322 Op.getOperand(6) // slc
33213323 };
3322 MachineFunction &MF = DAG.getMachineFunction();
33233324 SIMachineFunctionInfo *MFI = MF.getInfo();
33243325
33253326 unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
33333334 VT.getStoreSize(), VT.getStoreSize());
33343335
33353336 return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
3337 }
3338 case Intrinsic::amdgcn_tbuffer_load: {
3339 SDValue Ops[] = {
3340 Op.getOperand(0), // Chain
3341 Op.getOperand(2), // rsrc
3342 Op.getOperand(3), // vindex
3343 Op.getOperand(4), // voffset
3344 Op.getOperand(5), // soffset
3345 Op.getOperand(6), // offset
3346 Op.getOperand(7), // dfmt
3347 Op.getOperand(8), // nfmt
3348 Op.getOperand(9), // glc
3349 Op.getOperand(10) // slc
3350 };
3351
3352 EVT VT = Op.getOperand(2).getValueType();
3353
3354 MachineMemOperand *MMO = MF.getMachineMemOperand(
3355 MachinePointerInfo(),
3356 MachineMemOperand::MOLoad,
3357 VT.getStoreSize(), VT.getStoreSize());
3358 return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
3359 Op->getVTList(), Ops, VT, MMO);
33363360 }
33373361 // Basic sample.
33383362 case Intrinsic::amdgcn_image_sample:
33993423
34003424 SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
34013425 SelectionDAG &DAG) const {
3402 MachineFunction &MF = DAG.getMachineFunction();
34033426 SDLoc DL(Op);
34043427 SDValue Chain = Op.getOperand(0);
34053428 unsigned IntrinsicID = cast(Op.getOperand(1))->getZExtValue();
3429 MachineFunction &MF = DAG.getMachineFunction();
34063430
34073431 switch (IntrinsicID) {
34083432 case Intrinsic::amdgcn_exp: {
34693493 return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain,
34703494 Op.getOperand(2), Op.getOperand(3));
34713495 }
3496 case AMDGPUIntrinsic::AMDGPU_kill: {
3497 SDValue Src = Op.getOperand(2);
3498 if (const ConstantFPSDNode *K = dyn_cast(Src)) {
3499 if (!K->isNegative())
3500 return Chain;
3501
3502 SDValue NegOne = DAG.getTargetConstant(FloatToBits(-1.0f), DL, MVT::i32);
3503 return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, NegOne);
3504 }
3505
3506 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
3507 return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
3508 }
3509 case Intrinsic::amdgcn_s_barrier: {
3510 if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
3511 const SISubtarget &ST = MF.getSubtarget();
3512 unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
3513 if (WGSize <= ST.getWavefrontSize())
3514 return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
3515 Op.getOperand(0)), 0);
3516 }
3517 return SDValue();
3518 };
34723519 case AMDGPUIntrinsic::SI_tbuffer_store: {
3520
3521 // Extract vindex and voffset from vaddr as appropriate
3522 const ConstantSDNode *OffEn = cast(Op.getOperand(10));
3523 const ConstantSDNode *IdxEn = cast(Op.getOperand(11));
3524 SDValue VAddr = Op.getOperand(5);
3525
3526 SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
3527
3528 assert(!(OffEn->isOne() && IdxEn->isOne()) &&
3529 "Legacy intrinsic doesn't support both offset and index - use new version");
3530
3531 SDValue VIndex = IdxEn->isOne() ? VAddr : Zero;
3532 SDValue VOffset = OffEn->isOne() ? VAddr : Zero;
3533
3534 // Deal with the vec-3 case
3535 const ConstantSDNode *NumChannels = cast(Op.getOperand(4));
3536 auto Opcode = NumChannels->getZExtValue() == 3 ?
3537 AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT;
3538
3539 SDValue Ops[] = {
3540 Chain,
3541 Op.getOperand(3), // vdata
3542 Op.getOperand(2), // rsrc
3543 VIndex,
3544 VOffset,
3545 Op.getOperand(6), // soffset
3546 Op.getOperand(7), // inst_offset
3547 Op.getOperand(8), // dfmt
3548 Op.getOperand(9), // nfmt
3549 Op.getOperand(12), // glc
3550 Op.getOperand(13), // slc
3551 };
3552
3553 const ConstantSDNode *tfe = cast(Op.getOperand(14));
3554 assert(tfe->getZExtValue() == 0 &&
3555 "Value of tfe other than zero is unsupported");
3556
3557 EVT VT = Op.getOperand(3).getValueType();
3558 MachineMemOperand *MMO = MF.getMachineMemOperand(
3559 MachinePointerInfo(),
3560 MachineMemOperand::MOStore,
3561 VT.getStoreSize(), 4);
3562 return DAG.getMemIntrinsicNode(Opcode, DL,
3563 Op->getVTList(), Ops, VT, MMO);
3564 }
3565
3566 case Intrinsic::amdgcn_tbuffer_store: {
34733567 SDValue Ops[] = {
34743568 Chain,
3475 Op.getOperand(2),
3476 Op.getOperand(3),
3477 Op.getOperand(4),
3478 Op.getOperand(5),
3479 Op.getOperand(6),
3480 Op.getOperand(7),
3481 Op.getOperand(8),
3482 Op.getOperand(9),
3483 Op.getOperand(10),
3484 Op.getOperand(11),
3485 Op.getOperand(12),
3486 Op.getOperand(13),
3487 Op.getOperand(14)
3569 Op.getOperand(2), // vdata
3570 Op.getOperand(3), // rsrc
3571 Op.getOperand(4), // vindex
3572 Op.getOperand(5), // voffset
3573 Op.getOperand(6), // soffset
3574 Op.getOperand(7), // offset
3575 Op.getOperand(8), // dfmt
3576 Op.getOperand(9), // nfmt
3577 Op.getOperand(10), // glc
3578 Op.getOperand(11) // slc
34883579 };
3489
34903580 EVT VT = Op.getOperand(3).getValueType();
3491
34923581 MachineMemOperand *MMO = MF.getMachineMemOperand(
34933582 MachinePointerInfo(),
34943583 MachineMemOperand::MOStore,
34963585 return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
34973586 Op->getVTList(), Ops, VT, MMO);
34983587 }
3499 case AMDGPUIntrinsic::AMDGPU_kill: {
3500 SDValue Src = Op.getOperand(2);
3501 if (const ConstantFPSDNode *K = dyn_cast(Src)) {
3502 if (!K->isNegative())
3503 return Chain;
3504
3505 SDValue NegOne = DAG.getTargetConstant(FloatToBits(-1.0f), DL, MVT::i32);
3506 return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, NegOne);
3507 }
3508
3509 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
3510 return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
3511 }
3512 case Intrinsic::amdgcn_s_barrier: {
3513 if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
3514 const MachineFunction &MF = DAG.getMachineFunction();
3515 const SISubtarget &ST = MF.getSubtarget();
3516 unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
3517 if (WGSize <= ST.getWavefrontSize())
3518 return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
3519 Op.getOperand(0)), 0);
3520 }
3521 return SDValue();
3522 };
3588
35233589 default:
35243590 return Op;
35253591 }
4040 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
4141 >;
4242
43 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
44 SDTypeProfile<0, 13,
45 [SDTCisVT<0, v4i32>, // rsrc(SGPR)
46 SDTCisVT<1, iAny>, // vdata(VGPR)
47 SDTCisVT<2, i32>, // num_channels(imm)
48 SDTCisVT<3, i32>, // vaddr(VGPR)
43 def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT",
44 SDTypeProfile<1, 9,
45 [ // vdata
46 SDTCisVT<1, v4i32>, // rsrc
47 SDTCisVT<2, i32>, // vindex(VGPR)
48 SDTCisVT<3, i32>, // voffset(VGPR)
4949 SDTCisVT<4, i32>, // soffset(SGPR)
50 SDTCisVT<5, i32>, // inst_offset(imm)
50 SDTCisVT<5, i32>, // offset(imm)
5151 SDTCisVT<6, i32>, // dfmt(imm)
5252 SDTCisVT<7, i32>, // nfmt(imm)
53 SDTCisVT<8, i32>, // offen(imm)
54 SDTCisVT<9, i32>, // idxen(imm)
55 SDTCisVT<10, i32>, // glc(imm)
56 SDTCisVT<11, i32>, // slc(imm)
57 SDTCisVT<12, i32> // tfe(imm)
53 SDTCisVT<8, i32>, // glc(imm)
54 SDTCisVT<9, i32> // slc(imm)
5855 ]>,
59 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
56 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
6057 >;
58
59 def SDTtbuffer_store : SDTypeProfile<0, 10,
60 [ // vdata
61 SDTCisVT<1, v4i32>, // rsrc
62 SDTCisVT<2, i32>, // vindex(VGPR)
63 SDTCisVT<3, i32>, // voffset(VGPR)
64 SDTCisVT<4, i32>, // soffset(SGPR)
65 SDTCisVT<5, i32>, // offset(imm)
66 SDTCisVT<6, i32>, // dfmt(imm)
67 SDTCisVT<7, i32>, // nfmt(imm)
68 SDTCisVT<8, i32>, // glc(imm)
69 SDTCisVT<9, i32> // slc(imm)
70 ]>;
71
72 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
73 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
74 def SItbuffer_store_x3 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_X3",
75 SDTtbuffer_store,
76 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
6177
6278 def SDTBufferLoad : SDTypeProfile<1, 5,
6379 [ // vdata
545561 def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
546562 def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
547563 def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
564
565 def DFMT : NamedOperandU8<"DFMT", NamedMatchClass<"DFMT">>;
566 def NFMT : NamedOperandU8<"NFMT", NamedMatchClass<"NFMT">>;
548567
549568 def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
550569
11 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
22
33 ;CHECK-LABEL: {{^}}test1:
4 ;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
4 ;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32 glc slc
55 define amdgpu_vs void @test1(i32 %a1, i32 %vaddr) {
66 %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
77 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
1010 ret void
1111 }
1212
13 ;CHECK-LABEL: {{^}}test1_idx:
14 ;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offset:32 glc slc
15 define amdgpu_vs void @test1_idx(i32 %a1, i32 %vaddr) {
16 %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
17 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
18 i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 0, i32 1, i32 1,
19 i32 1, i32 0)
20 ret void
21 }
22
23 ;CHECK-LABEL: {{^}}test1_scalar_offset:
24 ;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, {{s[0-9]+}} idxen offset:32 glc slc
25 define amdgpu_vs void @test1_scalar_offset(i32 %a1, i32 %vaddr, i32 inreg %soffset) {
26 %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
27 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
28 i32 4, i32 %vaddr, i32 %soffset, i32 32, i32 14, i32 4, i32 0, i32 1, i32 1,
29 i32 1, i32 0)
30 ret void
31 }
32
33 ;CHECK-LABEL: {{^}}test1_no_glc_slc:
34 ;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32
35 define amdgpu_vs void @test1_no_glc_slc(i32 %a1, i32 %vaddr) {
36 %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
37 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
38 i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 0,
39 i32 0, i32 0)
40 ret void
41 }
42
1343 ;CHECK-LABEL: {{^}}test2:
14 ;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
44 ;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 offen offset:24 glc slc
1545 define amdgpu_vs void @test2(i32 %a1, i32 %vaddr) {
1646 %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
1747 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
2151 }
2252
2353 ;CHECK-LABEL: {{^}}test3:
24 ;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
54 ;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:11, nfmt:4, 0 offen offset:16 glc slc
2555 define amdgpu_vs void @test3(i32 %a1, i32 %vaddr) {
2656 %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
2757 call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
3161 }
3262
3363 ;CHECK-LABEL: {{^}}test4:
34 ;CHECK: tbuffer_store_format_x {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
64 ;CHECK: tbuffer_store_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:4, nfmt:4, 0 offen offset:8 glc slc
3565 define amdgpu_vs void @test4(i32 %vdata, i32 %vaddr) {
3666 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
3767 i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
0 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s
2
3 ; GCN-LABEL: {{^}}tbuffer_load:
4 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0
5 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:3, 0 glc
6 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0 slc
7 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0
8 ; GCN: s_waitcnt
9 define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) {
10 main_body:
11 %vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 4, i1 0, i1 0)
12 %vdata_glc = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 3, i1 1, i1 0)
13 %vdata_slc = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 1)
14 %vdata_f32 = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 0)
15 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
16 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
17 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
18 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
19 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
20 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
21 %r3 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r2, <4 x float> %vdata_f32, 3
22 ret {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r3
23 }
24
25 ; GCN-LABEL: {{^}}tbuffer_load_immoffs:
26 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offset:42
27 define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) {
28 main_body:
29 %vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 42, i32 14, i32 4, i1 0, i1 0)
30 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
31 ret <4 x float> %vdata.f
32 }
33
34 ; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
35 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:2, 61 offset:4095
36 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:3, {{s[0-9]+}} offset:73
37 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, {{s[0-9]+}} offset:1
38 ; GCN: s_waitcnt
39 define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
40 %vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 61, i32 4095, i32 15, i32 2, i1 0, i1 0)
41 %vdata_glc = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 %soffs, i32 73, i32 14, i32 3, i1 0, i1 0)
42 %vdata_slc = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 %soffs, i32 1, i32 13, i32 4, i1 0, i1 0)
43 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
44 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
45 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
46 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
47 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
48 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
49 ret {<4 x float>, <4 x float>, <4 x float>} %r2
50 }
51
52 ; GCN-LABEL: {{^}}tbuffer_load_idx:
53 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen
54 define amdgpu_vs <4 x float> @tbuffer_load_idx(<4 x i32> inreg, i32 %vindex) {
55 main_body:
56 %vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 0, i32 0, i32 0, i32 14, i32 4, i1 0, i1 0)
57 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
58 ret <4 x float> %vdata.f
59 }
60
61 ; GCN-LABEL: {{^}}tbuffer_load_ofs:
62 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen
63 define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) {
64 main_body:
65 %vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %voffs, i32 0, i32 0, i32 14, i32 4, i1 0, i1 0)
66 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
67 ret <4 x float> %vdata.f
68 }
69
70 ; GCN-LABEL: {{^}}tbuffer_load_ofs_imm:
71 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:52
72 define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) {
73 main_body:
74 %vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %voffs, i32 0, i32 52, i32 14, i32 4, i1 0, i1 0)
75 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
76 ret <4 x float> %vdata.f
77 }
78
79 ; GCN-LABEL: {{^}}tbuffer_load_both:
80 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen
81 define amdgpu_vs <4 x float> @tbuffer_load_both(<4 x i32> inreg, i32 %vindex, i32 %voffs) {
82 main_body:
83 %vdata = call <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 %voffs, i32 0, i32 0, i32 14, i32 4, i1 0, i1 0)
84 %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
85 ret <4 x float> %vdata.f
86 }
87
88
89 ; GCN-LABEL: {{^}}buffer_load_xy:
90 ; GCN: tbuffer_load_format_xy {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0
91 define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
92 %vdata = call <2 x i32> @llvm.amdgcn.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 13, i32 4, i1 0, i1 0)
93 %vdata.f = bitcast <2 x i32> %vdata to <2 x float>
94 ret <2 x float> %vdata.f
95 }
96
97 ; GCN-LABEL: {{^}}buffer_load_x:
98 ; GCN: tbuffer_load_format_x {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0
99 define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
100 %vdata = call i32 @llvm.amdgcn.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 13, i32 4, i1 0, i1 0)
101 %vdata.f = bitcast i32 %vdata to float
102 ret float %vdata.f
103 }
104
105 declare i32 @llvm.amdgcn.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
106 declare <2 x i32> @llvm.amdgcn.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
107 declare <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
108 declare <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
0 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s
2
3 ; GCN-LABEL: {{^}}tbuffer_store:
4 ; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:12, nfmt:2, 0
5 ; GCN: tbuffer_store_format_xyzw v[4:7], off, s[0:3], dfmt:13, nfmt:3, 0 glc
6 ; GCN: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0 slc
7 ; GCN: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0
8 define amdgpu_ps void @tbuffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
9 main_body:
10 %in1 = bitcast <4 x float> %1 to <4 x i32>
11 %in2 = bitcast <4 x float> %2 to <4 x i32>
12 %in3 = bitcast <4 x float> %3 to <4 x i32>
13 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 12, i32 2, i1 0, i1 0)
14 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in2, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 13, i32 3, i1 1, i1 0)
15 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 4, i1 0, i1 1)
16 call void @llvm.amdgcn.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 4, i1 0, i1 0)
17 ret void
18 }
19
20 ; GCN-LABEL: {{^}}tbuffer_store_immoffs:
21 ; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, 0 offset:42
22 define amdgpu_ps void @tbuffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
23 main_body:
24 %in1 = bitcast <4 x float> %1 to <4 x i32>
25 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 0, i32 42, i32 5, i32 7, i1 0, i1 0)
26 ret void
27 }
28
29 ; GCN-LABEL: {{^}}tbuffer_store_scalar_and_imm_offs:
30 ; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} offset:42
31 define amdgpu_ps void @tbuffer_store_scalar_and_imm_offs(<4 x i32> inreg, <4 x float> %vdata, i32 inreg %soffset) {
32 main_body:
33 %in1 = bitcast <4 x float> %vdata to <4 x i32>
34 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 %soffset, i32 42, i32 5, i32 7, i1 0, i1 0)
35 ret void
36 }
37
38 ; GCN-LABEL: {{^}}buffer_store_idx:
39 ; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:2, 0 idxen
40 define amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex) {
41 main_body:
42 %in1 = bitcast <4 x float> %vdata to <4 x i32>
43 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 %vindex, i32 0, i32 0, i32 0, i32 15, i32 2, i1 0, i1 0)
44 ret void
45 }
46
47 ; GCN-LABEL: {{^}}buffer_store_ofs:
48 ; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:3, nfmt:7, 0 offen
49 define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float> %vdata, i32 %voffset) {
50 main_body:
51 %in1 = bitcast <4 x float> %vdata to <4 x i32>
52 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 %voffset, i32 0, i32 0, i32 3, i32 7, i1 0, i1 0)
53 ret void
54 }
55
56 ; GCN-LABEL: {{^}}buffer_store_both:
57 ; GCN: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], dfmt:6, nfmt:4, 0 idxen offen
58 define amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex, i32 %voffset) {
59 main_body:
60 %in1 = bitcast <4 x float> %vdata to <4 x i32>
61 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 6, i32 4, i1 0, i1 0)
62 ret void
63 }
64
65 ; Ideally, the register allocator would avoid the wait here
66 ;
67 ; GCN-LABEL: {{^}}buffer_store_wait:
68 ; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:3, 0 idxen
69 ; GCN: s_waitcnt expcnt(0)
70 ; GCN: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
71 ; GCN: s_waitcnt vmcnt(0)
72 ; GCN: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], dfmt:16, nfmt:2, 0 idxen
73 define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex.1, i32 %vindex.2, i32 %vindex.3) {
74 main_body:
75 %in1 = bitcast <4 x float> %vdata to <4 x i32>
76 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 %vindex.1, i32 0, i32 0, i32 0, i32 15, i32 3, i1 0, i1 0)
77 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %vindex.2, i32 0, i1 0, i1 0)
78 %data.i = bitcast <4 x float> %data to <4 x i32>
79 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %data.i, <4 x i32> %0, i32 %vindex.3, i32 0, i32 0, i32 0, i32 16, i32 2, i1 0, i1 0)
80 ret void
81 }
82
83 ; GCN-LABEL: {{^}}buffer_store_x1:
84 ; GCN: tbuffer_store_format_x v0, v1, s[0:3], dfmt:13, nfmt:7, 0 idxen
85 define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
86 main_body:
87 %data.i = bitcast float %data to i32
88 call void @llvm.amdgcn.tbuffer.store.i32(i32 %data.i, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 13, i32 7, i1 0, i1 0)
89 ret void
90 }
91
92 ; GCN-LABEL: {{^}}buffer_store_x2:
93 ; GCN: tbuffer_store_format_xy v[0:1], v2, s[0:3], dfmt:1, nfmt:2, 0 idxen
94 define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %vindex) {
95 main_body:
96 %data.i = bitcast <2 x float> %data to <2 x i32>
97 call void @llvm.amdgcn.tbuffer.store.v2i32(<2 x i32> %data.i, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0)
98 ret void
99 }
100
101 declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
102 declare void @llvm.amdgcn.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
103 declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
104 declare void @llvm.amdgcn.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
105 declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
106
107 attributes #0 = { nounwind }
108 attributes #1 = { nounwind readonly }
109
2525 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp7, i32 1
2626 %tmp10 = insertelement <4 x i32> %tmp9, i32 undef, i32 2
2727 %tmp11 = insertelement <4 x i32> %tmp10, i32 undef, i32 3
28 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %tmp11, i32 4, i32 undef, i32 %arg, i32 0, i32 14, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
28 call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %tmp11, <4 x i32> undef, i32 undef, i32 0, i32 %arg, i32 0, i32 14, i32 4, i1 1, i1 1)
2929 ret void
3030 }
3131
3232 ; Function Attrs: nounwind
33 declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
33 declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
3434
3535 attributes #0 = { nounwind }
1010
1111 store i32 %v, i32 addrspace(3)* %p0
1212
13 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %v, i32 1, i32 undef, i32 undef, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
13 call void @llvm.amdgcn.tbuffer.store.i32(i32 %v, <4 x i32> undef, i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i1 1, i1 0)
1414
1515 %w = load i32, i32 addrspace(3)* %p0
1616 store i32 %w, i32 addrspace(3)* %p1
1717 ret void
1818 }
1919
20 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
20 declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
2121
2222 attributes #0 = { nounwind }
6161 %tmp2 = shl i32 %6, 2
6262 %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
6363 %tmp4 = add i32 %6, 16
64 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp3, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
64 %tmp1.4xi32 = bitcast <16 x i8> %tmp1 to <4 x i32>
65 call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1)
6566 ret void
6667 }
6768
7980 %tmp2 = shl i32 %6, 2
8081 %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
8182 %tmp4 = add i32 %6, 16
82 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp3, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
83 %tmp1.4xi32 = bitcast <16 x i8> %tmp1 to <4 x i32>
84 call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1)
8385 ret void
8486 }
8587
174176 }
175177
176178 declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0
177 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
179 declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
178180
179181 attributes #0 = { nounwind readonly }
2424 %array_vector10 = insertelement <4 x float> %array_vector9, float 0.000000e+00, i32 2
2525 %array_vector11 = insertelement <4 x float> %array_vector10, float undef, i32 3
2626 %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> undef, i32 undef, i32 4864, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
27 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %tmp3, i32 1, i32 36, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
27 call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 36, i32 4, i32 4, i1 1, i1 1)
2828 %bc = bitcast <4 x float> %array_vector3 to <4 x i32>
2929 %tmp4 = extractelement <4 x i32> %bc, i32 undef
30 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %tmp4, i32 1, i32 48, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
30 call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp4, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 48, i32 4, i32 4, i1 1, i1 1)
3131 %bc49 = bitcast <4 x float> %array_vector11 to <4 x i32>
3232 %tmp5 = extractelement <4 x i32> %bc49, i32 undef
33 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %tmp5, i32 1, i32 72, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
33 call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp5, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 72, i32 4, i32 4, i1 1, i1 1)
3434 %array_vector21 = insertelement <4 x float> , float %tmp, i32 1
3535 %array_vector22 = insertelement <4 x float> %array_vector21, float undef, i32 2
3636 %array_vector23 = insertelement <4 x float> %array_vector22, float undef, i32 3
37 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 undef, i32 1, i32 28, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
37 call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 28, i32 4, i32 4, i1 1, i1 1)
3838 %bc52 = bitcast <4 x float> %array_vector23 to <4 x i32>
3939 %tmp6 = extractelement <4 x i32> %bc52, i32 undef
40 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %tmp6, i32 1, i32 64, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
41 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 undef, i32 1, i32 20, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
42 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 undef, i32 1, i32 56, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
43 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 undef, i32 1, i32 92, i32 %arg, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
40 call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp6, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 64, i32 4, i32 4, i1 1, i1 1)
41 call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 20, i32 4, i32 4, i1 1, i1 1)
42 call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 56, i32 4, i32 4, i1 1, i1 1)
43 call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 92, i32 4, i32 4, i1 1, i1 1)
4444 ret void
4545 }
4646
4747 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
4848 declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #2
49 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #3
49 declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #3
5050
5151 attributes #0 = { nounwind "target-cpu"="tonga" }
5252 attributes #1 = { nounwind readnone }
0 ; RUN: llc -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s
11
2 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
3 declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
2 declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
3 declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
44 declare void @llvm.amdgcn.s.barrier() #1
55 declare i32 @llvm.amdgcn.workitem.id.x() #2
66
257257 ; %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
258258
259259 ; %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
260 ; call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
261 ; i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
262 ; i32 1, i32 0)
260 ; call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %vdata, <4 x i32> undef,
261 ; i32 %vaddr, i32 0, i32 0, i32 32, i32 14, i32 4, i1 1, i1 1)
263262
264263 ; %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
265264
0 // RUN: llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICI %s
1 // RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s
2 // RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4 //===----------------------------------------------------------------------===//
5 // Test for dfmt and nfmt (tbuffer only)
6 //===----------------------------------------------------------------------===//
7
8 tbuffer_load_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1
9 // SICI: tbuffer_load_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x78,0xe9,0x00,0x01,0x01,0x01]
10 // VI: tbuffer_load_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x78,0xe9,0x00,0x01,0x01,0x01]
11
12 tbuffer_load_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1
13 // SICI: tbuffer_load_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x79,0xe9,0x00,0x01,0x01,0x01]
14 // VI: tbuffer_load_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x78,0xe9,0x00,0x01,0x01,0x01]
15
16 tbuffer_load_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1
17 // SICI: tbuffer_load_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7b,0xe9,0x00,0x01,0x01,0x01]
18 // VI: tbuffer_load_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x79,0xe9,0x00,0x01,0x01,0x01]
19
20 tbuffer_store_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1
21 // SICI: tbuffer_store_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7c,0xe9,0x00,0x01,0x01,0x01]
22 // VI: tbuffer_store_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7a,0xe9,0x00,0x01,0x01,0x01]
23
24 tbuffer_store_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1
25 // SICI: tbuffer_store_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7d,0xe9,0x00,0x01,0x01,0x01]
26 // VI: tbuffer_store_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7a,0xe9,0x00,0x01,0x01,0x01]
27
28 tbuffer_store_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1
29 // SICI: tbuffer_store_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7f,0xe9,0x00,0x01,0x01,0x01]
30 // VI: tbuffer_store_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x01,0x01]
31
32 tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1
33 // SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 ; encoding: [0x00,0x00,0x7f,0xe9,0x00,0x01,0x1d,0x71]
34 // VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x1d,0x71]
35
0 # RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI
1
2 # VI: tbuffer_load_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x78,0xe9,0x00,0x01,0x01,0x01]
3 0x00 0x00 0x78 0xe9 0x00 0x01 0x01 0x01
4
5 # VI: tbuffer_load_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x78,0xe9,0x00,0x01,0x01,0x01]
6 0x00 0x80 0x78 0xe9 0x00 0x01 0x01 0x01
7
8 # VI: tbuffer_load_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x79,0xe9,0x00,0x01,0x01,0x01]
9 0x00 0x80 0x79 0xe9 0x00 0x01 0x01 0x01
10
11 # VI: tbuffer_store_format_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7a,0xe9,0x00,0x01,0x01,0x01]
12 0x00 0x00 0x7a 0xe9 0x00 0x01 0x01 0x01
13
14 # VI: tbuffer_store_format_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7a,0xe9,0x00,0x01,0x01,0x01]
15 0x00 0x80 0x7a 0xe9 0x00 0x01 0x01 0x01
16
17 # VI: tbuffer_store_format_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x01,0x01]
18 0x00 0x80 0x7b 0xe9 0x00 0x01 0x01 0x01
19
20 # VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x1d,0x71]
21 0x00 0x80 0x7b 0xe9 0x00 0x01 0x1d 0x71