llvm.org GIT mirror llvm / f3487e8
Fixed/Recommitted r267733 "[AMDGPU][llvm-mc] Add support of TTMP quads. Rework M0 exclusion for SMRD." Previously reverted by r267752. r267733 review: Differential Revision: http://reviews.llvm.org/D19342 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268066 91177308-0d34-0410-b5e6-96231b3b80d8 Artem Tamazov 4 years ago
9 changed file(s) with 148 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
643643 default: return -1;
644644 case 1: return AMDGPU::TTMP_32RegClassID;
645645 case 2: return AMDGPU::TTMP_64RegClassID;
646 case 4: return AMDGPU::TTMP_128RegClassID;
646647 }
647648 } else if (Is == IS_SGPR) {
648649 switch (RegWidth) {
649650 default: return -1;
650651 case 1: return AMDGPU::SGPR_32RegClassID;
651652 case 2: return AMDGPU::SGPR_64RegClassID;
652 case 4: return AMDGPU::SReg_128RegClassID;
653 case 4: return AMDGPU::SGPR_128RegClassID;
653654 case 8: return AMDGPU::SReg_256RegClassID;
654655 case 16: return AMDGPU::SReg_512RegClassID;
655656 }
6565 DECODE_OPERAND(VReg_96)
6666 DECODE_OPERAND(VReg_128)
6767
68 DECODE_OPERAND(SGPR_32)
6968 DECODE_OPERAND(SReg_32)
69 DECODE_OPERAND(SReg_32_XM0)
7070 DECODE_OPERAND(SReg_64)
7171 DECODE_OPERAND(SReg_128)
7272 DECODE_OPERAND(SReg_256)
236236 return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
237237 }
238238
239 MCOperand AMDGPUDisassembler::decodeOperand_SGPR_32(unsigned Val) const {
240 return createSRegOperand(AMDGPU::SGPR_32RegClassID, Val);
241 }
242
243239 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
244240 // table-gen generated disassembler doesn't care about operand types
245241 // leaving only registry class so SSrc_32 operand turns into SReg_32
246242 // and therefore we accept immediates and literals here as well
247243 return decodeSrcOp(OP32, Val);
244 }
245
246 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0(unsigned Val) const {
247 // SReg_32_XM0 is SReg_32 without M0
248 return decodeOperand_SReg_32(Val);
248249 }
249250
250251 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
6161 MCOperand decodeOperand_VReg_96(unsigned Val) const;
6262 MCOperand decodeOperand_VReg_128(unsigned Val) const;
6363
64 MCOperand decodeOperand_SGPR_32(unsigned Val) const;
6564 MCOperand decodeOperand_SReg_32(unsigned Val) const;
65 MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const;
6666 MCOperand decodeOperand_SReg_64(unsigned Val) const;
6767 MCOperand decodeOperand_SReg_128(unsigned Val) const;
6868 MCOperand decodeOperand_SReg_256(unsigned Val) const;
249249 } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
250250 Type = "v";
251251 NumRegs = 4;
252 } else if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) {
252 } else if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(reg)) {
253253 Type = "s";
254 NumRegs = 4;
255 } else if (MRI.getRegClass(AMDGPU::TTMP_128RegClassID).contains(reg)) {
256 Type = "ttmp";
254257 NumRegs = 4;
255258 } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
256259 Type = "v";
5959 // SMRD Instructions
6060 //===----------------------------------------------------------------------===//
6161
62 // We are using the SGPR_32 and not the SReg_32 register class for 32-bit
63 // SMRD instructions, because the SGPR_32 register class does not include M0
62 // We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
63 // SMRD instructions, because the SReg_32_XM0 register class does not include M0
6464 // and writing to M0 from an SMRD instruction will hang the GPU.
65 defm S_LOAD_DWORD : SMRD_Helper , "s_load_dword", SReg_64, SGPR_32>;
65 defm S_LOAD_DWORD : SMRD_Helper , "s_load_dword", SReg_64, SReg_32_XM0>;
6666 defm S_LOAD_DWORDX2 : SMRD_Helper , "s_load_dwordx2", SReg_64, SReg_64>;
6767 defm S_LOAD_DWORDX4 : SMRD_Helper , "s_load_dwordx4", SReg_64, SReg_128>;
6868 defm S_LOAD_DWORDX8 : SMRD_Helper , "s_load_dwordx8", SReg_64, SReg_256>;
6969 defm S_LOAD_DWORDX16 : SMRD_Helper , "s_load_dwordx16", SReg_64, SReg_512>;
7070
7171 defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
72 smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32
72 smrd<0x08>, "s_buffer_load_dword", SReg_128, SReg_32_XM0
7373 >;
7474
7575 defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
20862086 }
20872087
20882088 // It's unclear whether you can use M0 as the output of v_readlane_b32
2089 // instructions, so use SGPR_32 register class for spills to prevent
2089 // instructions, so use SReg_32_XM0 register class for spills to prevent
20902090 // this from happening.
2091 defm SI_SPILL_S32 : SI_SPILL_SGPR GPR_32>;
2091 defm SI_SPILL_S32 : SI_SPILL_SGPR Reg_32_XM0>;
20922092 defm SI_SPILL_S64 : SI_SPILL_SGPR ;
20932093 defm SI_SPILL_S128 : SI_SPILL_SGPR ;
20942094 defm SI_SPILL_S256 : SI_SPILL_SGPR ;
34303430 def : Pat <
34313431 (i64 (sext i32:$src)),
34323432 (REG_SEQUENCE SReg_64, $src, sub0,
3433 (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SGPR_32)), sub1)
3433 (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SReg_32_XM0)), sub1)
34343434 >;
34353435
34363436 def : Pat <
131131 (add (decimate (shl SGPR_32, 1), 2))]>;
132132
133133 // SGPR 128-bit registers
134 def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
134 def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
135135 [(add (decimate SGPR_32, 4)),
136136 (add (decimate (shl SGPR_32, 1), 4)),
137137 (add (decimate (shl SGPR_32, 2), 4)),
254254 TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
255255 >;
256256
257 // Subset of SReg_32 without M0 for SMRD instructions and alike.
258 // See comments in SIInstructions.td for more info.
259 def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32,
260 (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
261 TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
262 >;
263
257264 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;
258265
259266 def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> {
264271 (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)
265272 >;
266273
267 def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {
268 // Requires 2 s_mov_b64 to copy
269 let CopyCost = 2;
270 }
274 // Requires 2 s_mov_b64 to copy
275 let CopyCost = 2 in {
276
277 def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)>;
278
279 def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
280 let isAllocatable = 0;
281 }
282
283 def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)>;
284
285 } // End CopyCost = 2
271286
272287 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
273288 // Requires 4 s_mov_b64 to copy
1717 // SICI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
1818 // VI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
1919
20 buffer_load_dword v1, off, ttmp[4:7], s1
21 // SICI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x01]
22 // VI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x01]
23
2024 buffer_load_dword v1, off, s[4:7], s1 offset:4
2125 // SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
2226 // VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
4145 // SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
4246 // VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01]
4347
48 buffer_load_dword v1, off, ttmp[4:7], s1 offset:4 glc slc tfe
49 // SICI: buffer_load_dword v1, off, ttmp[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xdd,0x01]
50 // VI: buffer_load_dword v1, off, ttmp[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x9d,0x01]
51
4452 //===----------------------------------------------------------------------===//
4553 // load - vgpr offset
4654 //===----------------------------------------------------------------------===//
7381 // SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
7482 // VI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x52,0xe0,0x02,0x01,0x81,0x01]
7583
84 buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe
85 // SICI: buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xdd,0x01]
86 // VI: buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x52,0xe0,0x02,0x01,0x9d,0x01]
87
7688 //===----------------------------------------------------------------------===//
7789 // load - vgpr index
7890 //===----------------------------------------------------------------------===//
105117 // SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
106118 // VI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x52,0xe0,0x02,0x01,0x81,0x01]
107119
120 buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe
121 // SICI: buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xdd,0x01]
122 // VI: buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x52,0xe0,0x02,0x01,0x9d,0x01]
123
108124 //===----------------------------------------------------------------------===//
109125 // load - vgpr index and offset
110126 //===----------------------------------------------------------------------===//
137153 // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
138154 // VI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x52,0xe0,0x02,0x01,0x81,0x01]
139155
156 buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe
157 // SICI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xdd,0x71]
158 // VI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x52,0xe0,0x02,0x01,0x9d,0x71]
159
140160 //===----------------------------------------------------------------------===//
141161 // load - addr64
142162 //===----------------------------------------------------------------------===//
167187
168188 buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe
169189 // SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
190 // NOVI: error: instruction not supported on this GPU
191
192 buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe
193 // SICI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xdd,0x71]
170194 // NOVI: error: instruction not supported on this GPU
171195
172196 //===----------------------------------------------------------------------===//
201225 // SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
202226 // VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01]
203227
228 buffer_store_dword v1, off, ttmp[4:7], ttmp1 offset:4 glc slc tfe
229 // SICI: buffer_store_dword v1, off, ttmp[4:7], ttmp1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xdd,0x71]
230 // VI: buffer_store_dword v1, off, ttmp[4:7], ttmp1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x9d,0x71]
231
204232 //===----------------------------------------------------------------------===//
205233 // store - vgpr offset
206234 //===----------------------------------------------------------------------===//
233261 // SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
234262 // VI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x72,0xe0,0x02,0x01,0x81,0x01]
235263
264 buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe
265 // SICI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xdd,0x71]
266 // VI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x72,0xe0,0x02,0x01,0x9d,0x71]
267
236268 //===----------------------------------------------------------------------===//
237269 // store - vgpr index
238270 //===----------------------------------------------------------------------===//
265297 // SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
266298 // VI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x72,0xe0,0x02,0x01,0x81,0x01]
267299
300 buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe
301 // SICI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xdd,0x71]
302 // VI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x72,0xe0,0x02,0x01,0x9d,0x71]
303
268304 //===----------------------------------------------------------------------===//
269305 // store - vgpr index and offset
270306 //===----------------------------------------------------------------------===//
297333 // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
298334 // VI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x72,0xe0,0x02,0x01,0x81,0x01]
299335
336 buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe
337 // SICI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xdd,0x71]
338 // VI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x72,0xe0,0x02,0x01,0x9d,0x71]
339
300340 //===----------------------------------------------------------------------===//
301341 // store - addr64
302342 //===----------------------------------------------------------------------===//
327367
328368 buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe
329369 // SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
370 // NOVI: error: instruction not supported on this GPU
371
372 buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe
373 // SICI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xdd,0x71]
330374 // NOVI: error: instruction not supported on this GPU
331375
332376 //===----------------------------------------------------------------------===//
365409 // SICI: buffer_store_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
366410 // VI: buffer_store_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
367411
412 buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1
413 // SICI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
414 // VI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
415
368416 buffer_load_ubyte v1, off, s[4:7], s1
369417 // SICI: buffer_load_ubyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
370418 // VI: buffer_load_ubyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01]
371419
420 buffer_load_ubyte v1, off, ttmp[4:7], ttmp1
421 // SICI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1d,0x71]
422 // VI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x1d,0x71]
423
372424 buffer_load_sbyte v1, off, s[4:7], s1
373425 // SICI: buffer_load_sbyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
374426 // VI: buffer_load_sbyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01]
385437 // SICI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
386438 // VI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
387439
440 buffer_load_dword v1, off, ttmp[4:7], ttmp1
441 // SICI: buffer_load_dword v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x71]
442 // VI: buffer_load_dword v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x71]
443
388444 buffer_load_dwordx2 v[1:2], off, s[4:7], s1
389445 // SICI: buffer_load_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
390446 // VI: buffer_load_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01]
393449 // SICI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
394450 // VI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
395451
452 buffer_load_dwordx4 v[1:4], off, ttmp[4:7], ttmp1
453 // SICI: buffer_load_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x1d,0x71]
454 // VI: buffer_load_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x1d,0x71]
455
396456 buffer_store_byte v1, off, s[4:7], s1
397457 // SICI: buffer_store_byte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
398458 // VI: buffer_store_byte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
399459
460 buffer_store_byte v1, off, ttmp[4:7], ttmp1
461 // SICI: buffer_store_byte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x1d,0x71]
462 // VI: buffer_store_byte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x1d,0x71]
463
400464 buffer_store_short v1, off, s[4:7], s1
401465 // SICI: buffer_store_short v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
402466 // VI: buffer_store_short v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
412476 buffer_store_dwordx4 v[1:4], off, s[4:7], s1
413477 // SICI: buffer_store_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
414478 // VI: buffer_store_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01]
479
480 buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1
481 // SICI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1d,0x71]
482 // VI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x1d,0x71]
415483
416484 //===----------------------------------------------------------------------===//
417485 // Cache invalidation
5252 buffer_load_dwordx4 [v1,v2,v3,v4], off, [s4,s5,s6,s7], s1
5353 // SICI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
5454 // VI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
55
56 buffer_load_dword v1, off, [ttmp4,ttmp5,ttmp6,ttmp7], s1
57 // SICI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x01]
58 // VI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x01]
59
60 buffer_store_format_xyzw v[1:4], off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
61 // SICI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
62 // VI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
63
64 buffer_load_ubyte v1, off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
65 // SICI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1d,0x71]
66 // VI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x1d,0x71]
67
68 buffer_store_dwordx4 v[1:4], off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1
69 // SICI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1d,0x71]
70 // VI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x1d,0x71]
71
72 s_load_dwordx4 [ttmp4,ttmp5,ttmp6,ttmp7], [ttmp2,ttmp3], ttmp4
73 // SICI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x74,0x72,0xba,0xc0]
74 // VI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x39,0x1d,0x08,0xc0,0x74,0x00,0x00,0x00]
75
76 s_buffer_load_dword ttmp1, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4
77 // SICI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x74,0xf4,0x38,0xc2]
78 // VI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x7a,0x1c,0x20,0xc0,0x74,0x00,0x00,0x00]
79
80 s_buffer_load_dwordx4 [ttmp8,ttmp9,ttmp10,ttmp11], [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4
81 // SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2]
82 // VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00]
5151 // GCN: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
5252 // VI: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x01,0x01,0x08,0xc0,0x04,0x00,0x00,0x00]
5353
54 s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4
55 // GCN: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x74,0x72,0xba,0xc0]
56 // VI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x39,0x1d,0x08,0xc0,0x74,0x00,0x00,0x00]
57
5458 s_load_dwordx4 s[100:103], s[2:3], s4
5559 // GCN: s_load_dwordx4 s[100:103], s[2:3], s4 ; encoding: [0x04,0x02,0xb2,0xc0]
5660 // NOVI: error: invalid operand for instruction
8791 // GCN: s_buffer_load_dword s1, s[4:7], s4 ; encoding: [0x04,0x84,0x00,0xc2]
8892 // VI: s_buffer_load_dword s1, s[4:7], s4 ; encoding: [0x42,0x00,0x20,0xc0,0x04,0x00,0x00,0x00]
8993
94 s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4
95 // GCN: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x74,0xf4,0x38,0xc2]
96 // VI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x7a,0x1c,0x20,0xc0,0x74,0x00,0x00,0x00]
97
9098 s_buffer_load_dwordx2 s[8:9], s[4:7], 1
9199 // GCN: s_buffer_load_dwordx2 s[8:9], s[4:7], 0x1 ; encoding: [0x01,0x05,0x44,0xc2]
92100 // VI: s_buffer_load_dwordx2 s[8:9], s[4:7], 0x1 ; encoding: [0x02,0x02,0x26,0xc0,0x01,0x00,0x00,0x00]
102110 s_buffer_load_dwordx4 s[8:11], s[4:7], s4
103111 // GCN: s_buffer_load_dwordx4 s[8:11], s[4:7], s4 ; encoding: [0x04,0x04,0x84,0xc2]
104112 // VI: s_buffer_load_dwordx4 s[8:11], s[4:7], s4 ; encoding: [0x02,0x02,0x28,0xc0,0x04,0x00,0x00,0x00]
113
114 s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4
115 // GCN: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2]
116 // VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00]
105117
106118 s_buffer_load_dwordx4 s[100:103], s[4:7], s4
107119 // GCN: s_buffer_load_dwordx4 s[100:103], s[4:7], s4 ; encoding: [0x04,0x04,0xb2,0xc2]