llvm.org GIT mirror llvm / 3bd8feb
AMDGPU: Turn D16 for MIMG instructions into a regular operand Summary: This allows us to reduce the number of different machine instruction opcodes, which reduces the table sizes and helps flatten the TableGen multiclass hierarchies. We can do this because for each hardware MIMG opcode, we have a full set of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata and vaddr registers. Instead of having separate D16 machine instructions, a packed D16 instructions loading e.g. 4 components can simply use the same V2 opcode variant that non-D16 instructions use. We still require a TSFlag for D16 buffer instructions, because the D16-ness of buffer instructions is part of the opcode. Renaming the flag should help avoid future confusion. The one non-obvious code change is that for gather4 instructions, the disassembler can no longer automatically decide whether to use a V2 or a V4 variant. The existing logic which choose the correct variant for other MIMG instruction is extended to cover gather4 as well. As a bonus, some of the assembler error messages are now more helpful (e.g., complaining about a wrong data size instead of a non-existing instruction). While we're at it, delete a whole bunch of dead legacy TableGen code. Change-Id: I89b02c2841c06f95e662541433e597f5d4553978 Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D47434 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@335222 91177308-0d34-0410-b5e6-96231b3b80d8 Nicolai Haehnle 1 year, 8 months ago
16 changed file(s) with 331 addition(s) and 431 deletion(s). Raw diff Collapse all Expand all
108108 // Adjust the encoding family to GFX80 for D16 buffer instructions when the
109109 // subtarget has UnpackedD16VMem feature.
110110 // TODO: remove this when we discard GFX80 encoding.
111 if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
112 && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
111 if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
113112 Gen = SIEncodingFamily::GFX80;
114113
115114 int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
23002300 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
23012301 return true;
23022302
2303 // Gather4 instructions do not need validation: dst size is hardcoded.
2304 if (Desc.TSFlags & SIInstrFlags::Gather4)
2305 return true;
2306
23072303 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
23082304 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
23092305 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
23182314 if (DMask == 0)
23192315 DMask = 1;
23202316
2321 unsigned DataSize = countPopulation(DMask);
2322 if ((Desc.TSFlags & SIInstrFlags::D16) != 0 && hasPackedD16()) {
2323 DataSize = (DataSize + 1) / 2;
2317 unsigned DataSize =
2318 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2319 if (hasPackedD16()) {
2320 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2321 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2322 DataSize = (DataSize + 1) / 2;
23242323 }
23252324
23262325 return (VDataSize / 4) == DataSize + TFESize;
23882387
23892388 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
23902389 return true;
2391 if ((Desc.TSFlags & SIInstrFlags::D16) == 0)
2392 return true;
2393
2394 return !isCI() && !isSI();
2390
2391 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2392 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2393 if (isCI() || isSI())
2394 return false;
2395 }
2396
2397 return true;
23952398 }
23962399
23972400 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
42604263 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
42614264 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
42624265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4266 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
42634267 }
42644268
42654269 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
42844288
42854289 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
42864290 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
4291 }
4292
4293 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultD16() const {
4294 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyD16);
42874295 }
42884296
42894297 //===----------------------------------------------------------------------===//
43884396 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
43894397 {"r128", AMDGPUOperand::ImmTyR128, true, nullptr},
43904398 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
4399 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
43914400 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
43924401 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
43934402 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
50935102 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
50945103 case MCK_glc:
50955104 return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5096 case MCK_d16:
5097 return Operand.isD16() ? Match_Success : Match_InvalidOperand;
50985105 case MCK_idxen:
50995106 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
51005107 case MCK_offen:
719719 "buffer_store_format_xyzw", VReg_128
720720 >;
721721
722 let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in {
722 let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
723723 defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads <
724724 "buffer_load_format_d16_x", VGPR_32
725725 >;
746746 >;
747747 } // End HasUnpackedD16VMem.
748748
749 let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in {
749 let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
750750 defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads <
751751 "buffer_load_format_d16_x", VGPR_32
752752 >;
989989 defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>;
990990 defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
991991
992 let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in {
992 let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
993993 defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
994994 defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>;
995995 defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>;
10001000 defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>;
10011001 } // End HasUnpackedD16VMem.
10021002
1003 let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in {
1003 let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
10041004 defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
10051005 defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>;
10061006 defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>;
288288 // as if it has 1 dword, which could be not really so.
289289 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
290290
291 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4) {
292 return MCDisassembler::Success;
293 }
294
295291 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
296292 AMDGPU::OpName::vdst);
297293
303299
304300 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
305301 AMDGPU::OpName::tfe);
302 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
303 AMDGPU::OpName::d16);
306304
307305 assert(VDataIdx != -1);
308306 assert(DMaskIdx != -1);
309307 assert(TFEIdx != -1);
310308
311309 bool IsAtomic = (VDstIdx != -1);
310 bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
312311
313312 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
314313 if (DMask == 0)
315314 return MCDisassembler::Success;
316315
317 unsigned DstSize = countPopulation(DMask);
316 unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask);
318317 if (DstSize == 1)
319318 return MCDisassembler::Success;
320319
321 bool D16 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::D16;
320 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
322321 if (D16 && AMDGPU::hasPackedD16(STI)) {
323322 DstSize = (DstSize + 1) / 2;
324323 }
334333 NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize);
335334 }
336335 if (NewOpcode == -1) return MCDisassembler::Success;
336 } else if (IsGather4) {
337 if (D16 && AMDGPU::hasPackedD16(STI))
338 NewOpcode = AMDGPU::getMIMGGatherOpPackedD16(MI.getOpcode());
339 else
340 return MCDisassembler::Success;
337341 } else {
338342 NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), DstSize);
339343 assert(NewOpcode != -1 && "could not find matching mimg channel instruction");
214214 void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
215215 const MCSubtargetInfo &STI, raw_ostream &O) {
216216 printNamedBit(MI, OpNo, O, "lwe");
217 }
218
219 void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
220 const MCSubtargetInfo &STI, raw_ostream &O) {
221 printNamedBit(MI, OpNo, O, "d16");
217222 }
218223
219224 void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
8282 void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
8383 raw_ostream &O);
8484 void printLWE(const MCInst *MI, unsigned OpNo,
85 const MCSubtargetInfo &STI, raw_ostream &O);
86 void printD16(const MCInst *MI, unsigned OpNo,
8587 const MCSubtargetInfo &STI, raw_ostream &O);
8688 void printExpCompr(const MCInst *MI, unsigned OpNo,
8789 const MCSubtargetInfo &STI, raw_ostream &O);
1414 class MIMG_Atomic_Size {
1515 string Op = op;
1616 int AtomicSize = !if(is32Bit, 1, 2);
17 }
18
19 class MIMG_Gather_Size {
20 string Op = op;
21 int Channels = channels;
1722 }
1823
1924 class mimg si, bits<7> vi = si> {
3641 class MIMG_NoSampler_Helper op, string asm,
3742 RegisterClass dst_rc,
3843 RegisterClass addr_rc,
39 bit d16_bit=0,
40 string dns=""> : MIMG_Helper <
41 (outs dst_rc:$vdata),
42 (ins addr_rc:$vaddr, SReg_256:$srsrc,
43 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
44 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
45 asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
46 dns>, MIMGe {
44 bit has_d16,
45 string dns="">
46 : MIMG_Helper <(outs dst_rc:$vdata),
47 !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
48 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
49 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
50 !if(has_d16, (ins D16:$d16), (ins))),
51 asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
52 #!if(has_d16, "$d16", ""),
53 dns>,
54 MIMGe {
4755 let ssamp = 0;
48 let D16 = d16;
49 }
50
51 multiclass MIMG_NoSampler_Src_Helper_Helper op, string asm,
56
57 let HasD16 = has_d16;
58 let d16 = !if(HasD16, ?, 0);
59 }
60
61 multiclass MIMG_NoSampler_Src_Helper op, string asm,
5262 RegisterClass dst_rc,
53 int channels, bit d16_bit,
54 string suffix> {
55 def NAME # _V1 # suffix : MIMG_NoSampler_Helper
56 !if(!eq(channels, 1), "AMDGPU", "")>,
57 MIMG_Mask;
58 def NAME # _V2 # suffix : MIMG_NoSampler_Helper ,
59 MIMG_Mask;
60 def NAME # _V3 # suffix : MIMG_NoSampler_Helper ,
61 MIMG_Mask;
62 def NAME # _V4 # suffix : MIMG_NoSampler_Helper ,
63 MIMG_Mask;
64 }
65
66 multiclass MIMG_NoSampler_Src_Helper op, string asm,
67 RegisterClass dst_rc,
68 int channels> {
69 defm NAME : MIMG_NoSampler_Src_Helper_Helper ;
70
71 let d16 = 1 in {
72 let SubtargetPredicate = HasPackedD16VMem in {
73 defm NAME : MIMG_NoSampler_Src_Helper_Helper ;
74 } // End HasPackedD16VMem.
75
76 let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
77 defm NAME : MIMG_NoSampler_Src_Helper_Helper ;
78 } // End HasUnpackedD16VMem.
79 } // End d16 = 1.
80 }
81
82 multiclass MIMG_NoSampler op, string asm> {
83 defm _V1 : MIMG_NoSampler_Src_Helper ;
84 defm _V2 : MIMG_NoSampler_Src_Helper ;
85 defm _V3 : MIMG_NoSampler_Src_Helper ;
86 defm _V4 : MIMG_NoSampler_Src_Helper ;
87 }
88
89 multiclass MIMG_PckNoSampler op, string asm> {
90 defm NAME # _V1 : MIMG_NoSampler_Src_Helper_Helper ;
91 defm NAME # _V2 : MIMG_NoSampler_Src_Helper_Helper ;
92 defm NAME # _V3 : MIMG_NoSampler_Src_Helper_Helper ;
93 defm NAME # _V4 : MIMG_NoSampler_Src_Helper_Helper ;
63 int channels, bit has_d16> {
64 def NAME # _V1 : MIMG_NoSampler_Helper
65 !if(!eq(channels, 1), "AMDGPU", "")>,
66 MIMG_Mask;
67 def NAME # _V2 : MIMG_NoSampler_Helper ,
68 MIMG_Mask;
69 def NAME # _V3 : MIMG_NoSampler_Helper ,
70 MIMG_Mask;
71 def NAME # _V4 : MIMG_NoSampler_Helper ,
72 MIMG_Mask;
73 }
74
75 multiclass MIMG_NoSampler op, string asm, bit has_d16> {
76 defm _V1 : MIMG_NoSampler_Src_Helper ;
77 defm _V2 : MIMG_NoSampler_Src_Helper ;
78 defm _V3 : MIMG_NoSampler_Src_Helper ;
79 defm _V4 : MIMG_NoSampler_Src_Helper ;
9480 }
9581
9682 class MIMG_Store_Helper op, string asm,
9783 RegisterClass data_rc,
9884 RegisterClass addr_rc,
99 bit d16_bit=0,
100 string dns = ""> : MIMG_Helper <
101 (outs),
102 (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
103 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
104 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
105 asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe {
85 bit has_d16,
86 string dns = "">
87 : MIMG_Helper <(outs),
88 !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
89 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
90 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
91 !if(has_d16, (ins D16:$d16), (ins))),
92 asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
93 #!if(has_d16, "$d16", ""),
94 dns>,
95 MIMGe {
10696 let ssamp = 0;
10797 let mayLoad = 0;
10898 let mayStore = 1;
10999 let hasSideEffects = 0;
110100 let hasPostISelHook = 0;
111101 let DisableWQM = 1;
112 let D16 = d16;
113 }
114
115 multiclass MIMG_Store_Addr_Helper_Helper op, string asm,
116 RegisterClass data_rc,
117 int channels, bit d16_bit,
118 string suffix> {
119 def NAME # _V1 # suffix : MIMG_Store_Helper
120 !if(!eq(channels, 1), "AMDGPU", "")>,
121 MIMG_Mask;
122 def NAME # _V2 # suffix : MIMG_Store_Helper ,
123 MIMG_Mask;
124 def NAME # _V3 # suffix : MIMG_Store_Helper ,
125 MIMG_Mask;
126 def NAME # _V4 # suffix : MIMG_Store_Helper ,
127 MIMG_Mask;
102
103 let HasD16 = has_d16;
104 let d16 = !if(HasD16, ?, 0);
128105 }
129106
130107 multiclass MIMG_Store_Addr_Helper op, string asm,
131108 RegisterClass data_rc,
132 int channels> {
133 defm NAME : MIMG_Store_Addr_Helper_Helper ;
134
135 let d16 = 1 in {
136 let SubtargetPredicate = HasPackedD16VMem in {
137 defm NAME : MIMG_Store_Addr_Helper_Helper ;
138 } // End HasPackedD16VMem.
139
140 let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
141 defm NAME : MIMG_Store_Addr_Helper_Helper ;
142 } // End HasUnpackedD16VMem.
143 } // End d16 = 1.
144 }
145
146 multiclass MIMG_Store op, string asm> {
147 defm _V1 : MIMG_Store_Addr_Helper ;
148 defm _V2 : MIMG_Store_Addr_Helper ;
149 defm _V3 : MIMG_Store_Addr_Helper ;
150 defm _V4 : MIMG_Store_Addr_Helper ;
151 }
152
153 multiclass MIMG_PckStore op, string asm> {
154 defm NAME # _V1 : MIMG_Store_Addr_Helper_Helper ;
155 defm NAME # _V2 : MIMG_Store_Addr_Helper_Helper ;
156 defm NAME # _V3 : MIMG_Store_Addr_Helper_Helper ;
157 defm NAME # _V4 : MIMG_Store_Addr_Helper_Helper ;
109 int channels, bit has_d16> {
110 def NAME # _V1 : MIMG_Store_Helper
111 !if(!eq(channels, 1), "AMDGPU", "")>,
112 MIMG_Mask;
113 def NAME # _V2 : MIMG_Store_Helper ,
114 MIMG_Mask;
115 def NAME # _V3 : MIMG_Store_Helper ,
116 MIMG_Mask;
117 def NAME # _V4 : MIMG_Store_Helper ,
118 MIMG_Mask;
119 }
120
121 multiclass MIMG_Store op, string asm, bit has_d16> {
122 defm _V1 : MIMG_Store_Addr_Helper ;
123 defm _V2 : MIMG_Store_Addr_Helper ;
124 defm _V3 : MIMG_Store_Addr_Helper ;
125 defm _V4 : MIMG_Store_Addr_Helper ;
158126 }
159127
160128 class MIMG_Atomic_Helper
176144 }
177145
178146 class MIMG_Atomic_Real_si
179 RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm> :
180 MIMG_Atomic_Helper,
181 SIMCInstr,
182 MIMGe {
147 RegisterClass data_rc, RegisterClass addr_rc,
148 bit enableDasm>
149 : MIMG_Atomic_Helper,
150 SIMCInstr,
151 MIMGe {
183152 let isCodeGenOnly = 0;
184153 let AssemblerPredicates = [isSICI];
185154 let DisableDecoder = DisableSIDecoder;
155 let d16 = 0;
186156 }
187157
188158 class MIMG_Atomic_Real_vi
189 RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm> :
190 MIMG_Atomic_Helper,
191 SIMCInstr,
192 MIMGe {
159 RegisterClass data_rc, RegisterClass addr_rc,
160 bit enableDasm>
161 : MIMG_Atomic_Helper,
162 SIMCInstr,
163 MIMGe {
193164 let isCodeGenOnly = 0;
194165 let AssemblerPredicates = [isVI];
195166 let DisableDecoder = DisableVIDecoder;
167 let d16 = 0;
196168 }
197169
198170 multiclass MIMG_Atomic_Helper_m
244216 class MIMG_Sampler_Helper op, string asm,
245217 RegisterClass dst_rc,
246218 RegisterClass src_rc,
247 bit wqm,
248 bit d16_bit=0,
249 string dns=""> : MIMG_Helper <
250 (outs dst_rc:$vdata),
251 (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
252 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
253 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
254 asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
255 dns>, MIMGe {
219 bit wqm, bit has_d16,
220 string dns="">
221 : MIMG_Helper <(outs dst_rc:$vdata),
222 !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
223 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
224 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
225 !if(has_d16, (ins D16:$d16), (ins))),
226 asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
227 #!if(has_d16, "$d16", ""),
228 dns>,
229 MIMGe {
256230 let WQM = wqm;
257 let D16 = d16;
258 }
259
260 multiclass MIMG_Sampler_Src_Helper_Helper op, string asm,
261 RegisterClass dst_rc,
262 int channels, bit wqm,
263 bit d16_bit, string suffix> {
264 def _V1 # suffix : MIMG_Sampler_Helper
265 !if(!eq(channels, 1), "AMDGPU", "")>,
266 MIMG_Mask;
267 def _V2 # suffix : MIMG_Sampler_Helper ,
268 MIMG_Mask;
269 def _V3 # suffix : MIMG_Sampler_Helper ,
270 MIMG_Mask;
271 def _V4 # suffix : MIMG_Sampler_Helper ,
272 MIMG_Mask;
273 def _V8 # suffix : MIMG_Sampler_Helper ,
274 MIMG_Mask;
275 def _V16 # suffix : MIMG_Sampler_Helper ,
276 MIMG_Mask;
231
232 let HasD16 = has_d16;
233 let d16 = !if(HasD16, ?, 0);
277234 }
278235
279236 multiclass MIMG_Sampler_Src_Helper op, string asm,
280237 RegisterClass dst_rc,
281 int channels, bit wqm> {
282 defm "" : MIMG_Sampler_Src_Helper_Helper ;
283
284 let d16 = 1 in {
285 let SubtargetPredicate = HasPackedD16VMem in {
286 defm "" : MIMG_Sampler_Src_Helper_Helper ;
287 } // End HasPackedD16VMem.
288
289 let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
290 defm "" : MIMG_Sampler_Src_Helper_Helper ;
291 } // End HasUnpackedD16VMem.
292 } // End d16 = 1.
293 }
294
295 multiclass MIMG_Sampler op, string asm, bit wqm=0> {
296 defm _V1 : MIMG_Sampler_Src_Helper;
297 defm _V2 : MIMG_Sampler_Src_Helper;
298 defm _V3 : MIMG_Sampler_Src_Helper;
299 defm _V4 : MIMG_Sampler_Src_Helper;
238 int channels, bit wqm, bit has_d16> {
239 def _V1 : MIMG_Sampler_Helper
240 !if(!eq(channels, 1), "AMDGPU", "")>,
241 MIMG_Mask;
242 def _V2 : MIMG_Sampler_Helper ,
243 MIMG_Mask;
244 def _V3 : MIMG_Sampler_Helper ,
245 MIMG_Mask;
246 def _V4 : MIMG_Sampler_Helper ,
247 MIMG_Mask;
248 def _V8 : MIMG_Sampler_Helper ,
249 MIMG_Mask;
250 def _V16 : MIMG_Sampler_Helper ,
251 MIMG_Mask;
252 }
253
254 multiclass MIMG_Sampler op, string asm, bit wqm = 0, bit has_d16 = 1> {
255 defm _V1 : MIMG_Sampler_Src_Helper;
256 defm _V2 : MIMG_Sampler_Src_Helper;
257 defm _V3 : MIMG_Sampler_Src_Helper;
258 defm _V4 : MIMG_Sampler_Src_Helper;
300259 }
301260
302261 multiclass MIMG_Sampler_WQM op, string asm> : MIMG_Sampler;
305264 RegisterClass dst_rc,
306265 RegisterClass src_rc,
307266 bit wqm,
308 bit d16_bit=0,
309 string dns=""> : MIMG <
310 (outs dst_rc:$vdata),
311 (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
312 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
313 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
314 asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
315 []>, MIMGe {
267 string dns="">
268 : MIMG <(outs dst_rc:$vdata),
269 (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
270 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
271 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da, D16:$d16),
272 asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da$d16",
273 []>,
274 MIMGe {
316275 let mayLoad = 1;
317276 let mayStore = 0;
318277
326285 let Gather4 = 1;
327286 let hasPostISelHook = 0;
328287 let WQM = wqm;
329 let D16 = d16;
288 let HasD16 = 1;
330289
331290 let DecoderNamespace = dns;
332291 let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
335294
336295 multiclass MIMG_Gather_Src_Helper op, string asm,
337296 RegisterClass dst_rc,
338 bit wqm, bit d16_bit,
339 string prefix,
340 string suffix> {
341 def prefix # _V1 # suffix : MIMG_Gather_Helper ;
342 def prefix # _V2 # suffix : MIMG_Gather_Helper ;
343 def prefix # _V3 # suffix : MIMG_Gather_Helper ;
344 def prefix # _V4 # suffix : MIMG_Gather_Helper ;
345 def prefix # _V8 # suffix : MIMG_Gather_Helper ;
346 def prefix # _V16 # suffix : MIMG_Gather_Helper ;
297 int channels, bit wqm> {
298 def _V1 : MIMG_Gather_Helper
299 !if(!eq(channels, 4), "AMDGPU", "")>,
300 MIMG_Gather_Size;
301 def _V2 : MIMG_Gather_Helper ,
302 MIMG_Gather_Size;
303 def _V3 : MIMG_Gather_Helper ,
304 MIMG_Gather_Size;
305 def _V4 : MIMG_Gather_Helper ,
306 MIMG_Gather_Size;
307 def _V8 : MIMG_Gather_Helper ,
308 MIMG_Gather_Size;
309 def _V16 : MIMG_Gather_Helper ,
310 MIMG_Gather_Size;
347311 }
348312
349313 multiclass MIMG_Gather op, string asm, bit wqm=0> {
350 defm "" : MIMG_Gather_Src_Helper;
351
352 let d16 = 1 in {
353 let AssemblerPredicate = HasPackedD16VMem in {
354 defm "" : MIMG_Gather_Src_Helper;
355 } // End HasPackedD16VMem.
356
357 let AssemblerPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
358 defm "" : MIMG_Gather_Src_Helper;
359 } // End HasUnpackedD16VMem.
360 } // End d16 = 1.
314 defm _V2 : MIMG_Gather_Src_Helper; /* for packed D16 only */
315 defm _V4 : MIMG_Gather_Src_Helper;
361316 }
362317
363318 multiclass MIMG_Gather_WQM op, string asm> : MIMG_Gather;
366321 // MIMG Instructions
367322 //===----------------------------------------------------------------------===//
368323 let SubtargetPredicate = isGCN in {
369 defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
370 defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
371 defm IMAGE_LOAD_PCK : MIMG_PckNoSampler <0x00000002, "image_load_pck">;
372 defm IMAGE_LOAD_PCK_SGN : MIMG_PckNoSampler <0x00000003, "image_load_pck_sgn">;
373 defm IMAGE_LOAD_MIP_PCK : MIMG_PckNoSampler <0x00000004, "image_load_mip_pck">;
374 defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_PckNoSampler <0x00000005, "image_load_mip_pck_sgn">;
375 defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store">;
376 defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip">;
377 defm IMAGE_STORE_PCK : MIMG_PckStore <0x0000000a, "image_store_pck">;
378 defm IMAGE_STORE_MIP_PCK : MIMG_PckStore <0x0000000b, "image_store_mip_pck">;
324 defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load", 1>;
325 defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip", 1>;
326 defm IMAGE_LOAD_PCK : MIMG_NoSampler <0x00000002, "image_load_pck", 0>;
327 defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler <0x00000003, "image_load_pck_sgn", 0>;
328 defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler <0x00000004, "image_load_mip_pck", 0>;
329 defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler <0x00000005, "image_load_mip_pck_sgn", 0>;
330 defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store", 1>;
331 defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip", 1>;
332 defm IMAGE_STORE_PCK : MIMG_Store <0x0000000a, "image_store_pck", 0>;
333 defm IMAGE_STORE_MIP_PCK : MIMG_Store <0x0000000b, "image_store_mip_pck", 0>;
379334
380335 let mayLoad = 0, mayStore = 0 in {
381 defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
336 defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo", 0>;
382337 }
383338
384339 defm IMAGE_ATOMIC_SWAP : MIMG_Atomic , "image_atomic_swap">;
456411 defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
457412
458413 let mayLoad = 0, mayStore = 0 in {
459 defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
414 defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod", 1, 0>;
460415 }
461416
462417 defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">;
518473 }
519474
520475 class ImageDimPattern
521 string dop, ValueType dty,
476 string dop, ValueType dty, bit d16,
522477 string suffix = ""> : GCNPat<(undef), (undef)> {
523478 list AddrArgs = I.P.AddrDefaultArgs;
524479 getDwordsType AddrDwords = getDwordsType;
525480
526 Instruction MI =
527 !cast(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
481 MIMG MI =
482 !cast(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
528483
529484 // DAG fragment to match data arguments (vdata for store/atomic, dmask
530485 // for non-atomic).
580535 0, /* r128 */
581536 0, /* tfe */
582537 0 /*(as_i1imm $lwe)*/,
583 { I.P.Dim.DA }));
538 { I.P.Dim.DA }),
539 !if(MI.HasD16, (MI d16), (MI)));
584540 let ResultInstrs = [
585541 !if(IsCmpSwap, (EXTRACT_SUBREG ImageInstruction, sub0), ImageInstruction)
586542 ];
588544
589545 foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
590546 AMDGPUImageDimGetResInfoIntrinsics) in {
591 def intr#_pat_v1 : ImageDimPattern;
592 def intr#_pat_v2 : ImageDimPattern;
593 def intr#_pat_v4 : ImageDimPattern>;
547 def intr#_pat_v1 : ImageDimPattern>;
548 def intr#_pat_v2 : ImageDimPattern;
549 def intr#_pat_v4 : ImageDimPattern;
594550 }
595551
596552 multiclass ImageDimD16Helper
597553 AMDGPUImageDimIntrinsic d16helper> {
598554 let SubtargetPredicate = HasUnpackedD16VMem in {
599 def _unpacked_v1 : ImageDimPattern;
600 def _unpacked_v2 : ImageDimPattern;
601 def _unpacked_v4 : ImageDimPattern>;
555 def _unpacked_v1 : ImageDimPattern>;
556 def _unpacked_v2 : ImageDimPattern;
557 def _unpacked_v4 : ImageDimPattern;
602558 } // End HasUnpackedD16VMem.
603559
604560 let SubtargetPredicate = HasPackedD16VMem in {
605 def _packed_v1 : ImageDimPattern;
606 def _packed_v2 : ImageDimPattern;
607 def _packed_v4 : ImageDimPattern>;
561 def _packed_v1 : ImageDimPattern>;
562 def _packed_v2 : ImageDimPattern;
563 def _packed_v4 : ImageDimPattern;
608564 } // End HasPackedD16VMem.
609565 }
610566
626582 }
627583
628584 foreach intr = AMDGPUImageDimGatherIntrinsics in {
629 def intr#_pat3 : ImageDimPattern>;
585 def intr#_pat3 : ImageDimPattern, 0>;
630586
631587 def intr#_d16helper_profile : AMDGPUDimProfileCopy {
632588 let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty);
642598 def intr#_unpacked_v4 :
643599 ImageDimPattern(
644600 "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name),
645 "_V4", v4i32, "_D16_gfx80">;
601 "_V4", v4i32, 1>;
646602 } // End HasUnpackedD16VMem.
647603
648604 let SubtargetPredicate = HasPackedD16VMem in {
649 def intr#_packed_v4 : ImageDimPattern"_D16">;
605 def intr#_packed_v4 : ImageDimPattern1>;
650606 } // End HasPackedD16VMem.
651607 }
652608
653609 foreach intr = AMDGPUImageDimAtomicIntrinsics in {
654 def intr#_pat1 : ImageDimPattern>;
610 def intr#_pat1 : ImageDimPattern, 0>;
655611 }
656612
657613 /********** ======================= **********/
662618 // TODO:
663619 // 1. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128).
664620 // 2. Add A16 support when we pass address of half type.
665 multiclass ImageSamplePattern ValueType dt, ValueType vt> {
621 multiclass ImageSamplePattern
622 ValueType dt, ValueType vt, bit d16> {
666623 def : GCNPat<
667624 (dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc,
668625 i1:$slc, i1:$lwe, i1:$da)),
669 (opcode $addr, $rsrc, $sampler,
670 (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
671 0, 0, (as_i1imm $lwe), (as_i1imm $da))
626 !con((opcode $addr, $rsrc, $sampler, (as_i32imm $dmask), (as_i1imm $unorm),
627 (as_i1imm $glc), (as_i1imm $slc), 0, 0, (as_i1imm $lwe),
628 (as_i1imm $da)),
629 !if(opcode.HasD16, (opcode d16), (opcode)))
672630 >;
673631 }
674632
675 multiclass ImageSampleDataPatterns {
676 defm : ImageSamplePattern(opcode # _V1 # suffix), dt, f32>;
677 defm : ImageSamplePattern(opcode # _V2 # suffix), dt, v2f32>;
678 defm : ImageSamplePattern(opcode # _V4 # suffix), dt, v4f32>;
679 defm : ImageSamplePattern(opcode # _V8 # suffix), dt, v8f32>;
680 defm : ImageSamplePattern(opcode # _V16 # suffix), dt, v16f32>;
633 multiclass ImageSampleDataPatterns
634 ValueType dt, bit d16> {
635 defm : ImageSamplePattern(opcode # _V1), dt, f32, d16>;
636 defm : ImageSamplePattern(opcode # _V2), dt, v2f32, d16>;
637 defm : ImageSamplePattern(opcode # _V4), dt, v4f32, d16>;
638 defm : ImageSamplePattern(opcode # _V8), dt, v8f32, d16>;
639 defm : ImageSamplePattern(opcode # _V16), dt, v16f32, d16>;
681640 }
682641
683642 // ImageSample patterns.
684643 multiclass ImageSamplePatterns {
685 defm : ImageSampleDataPatterns(opcode # _V1), f32>;
686 defm : ImageSampleDataPatterns(opcode # _V2), v2f32>;
687 defm : ImageSampleDataPatterns(opcode # _V4), v4f32>;
644 defm : ImageSampleDataPatterns(opcode # _V1), f32, 0>;
645 defm : ImageSampleDataPatterns(opcode # _V2), v2f32, 0>;
646 defm : ImageSampleDataPatterns(opcode # _V4), v4f32, 0>;
688647
689648 let SubtargetPredicate = HasUnpackedD16VMem in {
690 defm : ImageSampleDataPatterns(opcode # _V1), f16, "_D16_gfx80">;
649 defm : ImageSampleDataPatterns(opcode # _V1), f16, 1>;
691650 } // End HasUnpackedD16VMem.
692651
693652 let SubtargetPredicate = HasPackedD16VMem in {
694 defm : ImageSampleDataPatterns(opcode # _V1), f16, "_D16">;
695 defm : ImageSampleDataPatterns(opcode # _V1), v2f16, "_D16">;
696 defm : ImageSampleDataPatterns(opcode # _V2), v4f16, "_D16">;
653 defm : ImageSampleDataPatterns(opcode # _V1), f16, 1>;
654 defm : ImageSampleDataPatterns(opcode # _V1), v2f16, 1>;
655 defm : ImageSampleDataPatterns(opcode # _V2), v4f16, 1>;
697656 } // End HasPackedD16VMem.
698657 }
699658
700659 // ImageSample alternative patterns for illegal vector half Types.
701660 multiclass ImageSampleAltPatterns {
702661 let SubtargetPredicate = HasUnpackedD16VMem in {
703 defm : ImageSampleDataPatterns(opcode # _V2), v2i32, "_D16_gfx80">;
704 defm : ImageSampleDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">;
662 defm : ImageSampleDataPatterns(opcode # _V2), v2i32, 1>;
663 defm : ImageSampleDataPatterns(opcode # _V4), v4i32, 1>;
705664 } // End HasUnpackedD16VMem.
706665 }
707666
708667 // ImageGather4 patterns.
709668 multiclass ImageGather4Patterns {
710 defm : ImageSampleDataPatterns(opcode # _V4), v4f32>;
669 defm : ImageSampleDataPatterns(opcode # _V4), v4f32, 0>;
711670
712671 let SubtargetPredicate = HasPackedD16VMem in {
713 defm : ImageSampleDataPatterns(opcode # _V2), v4f16, "_D16">;
672 defm : ImageSampleDataPatterns(opcode # _V2), v4f16, 1>;
714673 } // End HasPackedD16VMem.
715674 }
716675
717676 // ImageGather4 alternative patterns for illegal vector half Types.
718677 multiclass ImageGather4AltPatterns {
719678 let SubtargetPredicate = HasUnpackedD16VMem in {
720 defm : ImageSampleDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">;
679 defm : ImageSampleDataPatterns(opcode # _V4), v4i32, 1>;
721680 } // End HasUnpackedD16VMem.
722
723681 }
724682
725683 // ImageLoad for amdgcn.
726 multiclass ImageLoadPattern ValueType dt, ValueType vt> {
684 multiclass ImageLoadPattern
685 ValueType dt, ValueType vt, bit d16> {
727686 def : GCNPat <
728687 (dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe,
729688 i1:$da)),
730 (opcode $addr, $rsrc,
731 (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
732 0, 0, (as_i1imm $lwe), (as_i1imm $da))
689 !con((opcode $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc),
690 (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)),
691 !if(opcode.HasD16, (opcode d16), (opcode)))
733692 >;
734693 }
735694
736 multiclass ImageLoadDataPatterns {
737 defm : ImageLoadPattern(opcode # _V1 # suffix), dt, i32>;
738 defm : ImageLoadPattern(opcode # _V2 # suffix), dt, v2i32>;
739 defm : ImageLoadPattern(opcode # _V4 # suffix), dt, v4i32>;
695 multiclass ImageLoadDataPatterns
696 ValueType dt, bit d16> {
697 defm : ImageLoadPattern(opcode # _V1), dt, i32, d16>;
698 defm : ImageLoadPattern(opcode # _V2), dt, v2i32, d16>;
699 defm : ImageLoadPattern(opcode # _V4), dt, v4i32, d16>;
740700 }
741701
742702 // ImageLoad patterns.
743703 // TODO: support v3f32.
744704 multiclass ImageLoadPatterns {
745 defm : ImageLoadDataPatterns(opcode # _V1), f32>;
746 defm : ImageLoadDataPatterns(opcode # _V2), v2f32>;
747 defm : ImageLoadDataPatterns(opcode # _V4), v4f32>;
705 defm : ImageLoadDataPatterns(opcode # _V1), f32, 0>;
706 defm : ImageLoadDataPatterns(opcode # _V2), v2f32, 0>;
707 defm : ImageLoadDataPatterns(opcode # _V4), v4f32, 0>;
748708
749709 let SubtargetPredicate = HasUnpackedD16VMem in {
750 defm : ImageLoadDataPatterns(opcode # _V1), f16, "_D16_gfx80">;
710 defm : ImageLoadDataPatterns(opcode # _V1), f16, 1>;
751711 } // End HasUnpackedD16VMem.
752712
753713 let SubtargetPredicate = HasPackedD16VMem in {
754 defm : ImageLoadDataPatterns(opcode # _V1), f16, "_D16">;
755 defm : ImageLoadDataPatterns(opcode # _V1), v2f16, "_D16">;
756 defm : ImageLoadDataPatterns(opcode # _V2), v4f16, "_D16">;
714 defm : ImageLoadDataPatterns(opcode # _V1), f16, 1>;
715 defm : ImageLoadDataPatterns(opcode # _V1), v2f16, 1>;
716 defm : ImageLoadDataPatterns(opcode # _V2), v4f16, 1>;
757717 } // End HasPackedD16VMem.
758718 }
759719
760720 // ImageLoad alternative patterns for illegal vector half Types.
761721 multiclass ImageLoadAltPatterns {
762722 let SubtargetPredicate = HasUnpackedD16VMem in {
763 defm : ImageLoadDataPatterns(opcode # _V2), v2i32, "_D16_gfx80">;
764 defm : ImageLoadDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">;
723 defm : ImageLoadDataPatterns(opcode # _V2), v2i32, 1>;
724 defm : ImageLoadDataPatterns(opcode # _V4), v4i32, 1>;
765725 } // End HasUnPackedD16VMem.
766726 }
767727
768728 // ImageStore for amdgcn.
769 multiclass ImageStorePattern ValueType dt, ValueType vt> {
729 multiclass ImageStorePattern
730 ValueType dt, ValueType vt, bit d16> {
770731 def : GCNPat <
771732 (name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc,
772733 i1:$lwe, i1:$da),
773 (opcode $data, $addr, $rsrc,
774 (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
775 0, 0, (as_i1imm $lwe), (as_i1imm $da))
734 !con((opcode $data, $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc),
735 (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)),
736 !if(opcode.HasD16, (opcode d16), (opcode)))
776737 >;
777738 }
778739
779 multiclass ImageStoreDataPatterns {
780 defm : ImageStorePattern(opcode # _V1 # suffix), dt, i32>;
781 defm : ImageStorePattern(opcode # _V2 # suffix), dt, v2i32>;
782 defm : ImageStorePattern(opcode # _V4 # suffix), dt, v4i32>;
740 multiclass ImageStoreDataPatterns
741 ValueType dt, bit d16> {
742 defm : ImageStorePattern(opcode # _V1), dt, i32, d16>;
743 defm : ImageStorePattern(opcode # _V2), dt, v2i32, d16>;
744 defm : ImageStorePattern(opcode # _V4), dt, v4i32, d16>;
783745 }
784746
785747 // ImageStore patterns.
786748 // TODO: support v3f32.
787749 multiclass ImageStorePatterns {
788 defm : ImageStoreDataPatterns(opcode # _V1), f32>;
789 defm : ImageStoreDataPatterns(opcode # _V2), v2f32>;
790 defm : ImageStoreDataPatterns(opcode # _V4), v4f32>;
750 defm : ImageStoreDataPatterns(opcode # _V1), f32, 0>;
751 defm : ImageStoreDataPatterns(opcode # _V2), v2f32, 0>;
752 defm : ImageStoreDataPatterns(opcode # _V4), v4f32, 0>;
791753
792754 let SubtargetPredicate = HasUnpackedD16VMem in {
793 defm : ImageStoreDataPatterns(opcode # _V1), f16, "_D16_gfx80">;
755 defm : ImageStoreDataPatterns(opcode # _V1), f16, 1>;
794756 } // End HasUnpackedD16VMem.
795757
796758 let SubtargetPredicate = HasPackedD16VMem in {
797 defm : ImageStoreDataPatterns(opcode # _V1), f16, "_D16">;
798 defm : ImageStoreDataPatterns(opcode # _V1), v2f16, "_D16">;
799 defm : ImageStoreDataPatterns(opcode # _V2), v4f16, "_D16">;
759 defm : ImageStoreDataPatterns(opcode # _V1), f16, 1>;
760 defm : ImageStoreDataPatterns(opcode # _V1), v2f16, 1>;
761 defm : ImageStoreDataPatterns(opcode # _V2), v4f16, 1>;
800762 } // End HasPackedD16VMem.
801763 }
802764
803765 // ImageStore alternative patterns.
804766 multiclass ImageStoreAltPatterns {
805767 let SubtargetPredicate = HasUnpackedD16VMem in {
806 defm : ImageStoreDataPatterns(opcode # _V2), v2i32, "_D16_gfx80">;
807 defm : ImageStoreDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">;
768 defm : ImageStoreDataPatterns(opcode # _V2), v2i32, 1>;
769 defm : ImageStoreDataPatterns(opcode # _V4), v4i32, 1>;
808770 } // End HasUnpackedD16VMem.
809771
810772 let SubtargetPredicate = HasPackedD16VMem in {
811 defm : ImageStoreDataPatterns(opcode # _V1), i32, "_D16">;
812 defm : ImageStoreDataPatterns(opcode # _V2), v2i32, "_D16">;
773 defm : ImageStoreDataPatterns(opcode # _V1), i32, 1>;
774 defm : ImageStoreDataPatterns(opcode # _V2), v2i32, 1>;
813775 } // End HasPackedD16VMem.
814776 }
815777
1029991 defm : ImageAtomicPatterns;
1030992 defm : ImageAtomicPatterns;
1031993 defm : ImageAtomicPatterns;
1032
1033 /* SIsample for simple 1D texture lookup */
1034 def : GCNPat <
1035 (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
1036 (IMAGE_SAMPLE_V4_V1 $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
1037 >;
1038
1039 class SamplePattern : GCNPat <
1040 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
1041 (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
1042 >;
1043
1044 class SampleRectPattern : GCNPat <
1045 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT),
1046 (opcode $addr, $rsrc, $sampler, 0xf, 1, 0, 0, 0, 0, 0, 0)
1047 >;
1048
1049 class SampleArrayPattern : GCNPat <
1050 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY),
1051 (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
1052 >;
1053
1054 class SampleShadowPattern
1055 ValueType vt> : GCNPat <
1056 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW),
1057 (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
1058 >;
1059
1060 class SampleShadowArrayPattern
1061 ValueType vt> : GCNPat <
1062 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
1063 (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
1064 >;
1065
1066 /* SIsample* for texture lookups consuming more address parameters */
1067 multiclass SamplePatterns
1068 MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
1069 MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
1070 def : SamplePattern ;
1071 def : SampleRectPattern ;
1072 def : SampleArrayPattern ;
1073 def : SampleShadowPattern ;
1074 def : SampleShadowArrayPattern ;
1075
1076 def : SamplePattern ;
1077 def : SampleArrayPattern ;
1078 def : SampleShadowPattern ;
1079 def : SampleShadowArrayPattern ;
1080
1081 def : SamplePattern ;
1082 def : SampleArrayPattern ;
1083 def : SampleShadowPattern ;
1084 def : SampleShadowArrayPattern ;
1085
1086 def : SamplePattern ;
1087 def : SampleArrayPattern ;
1088 def : SampleShadowPattern ;
1089 def : SampleShadowArrayPattern ;
1090 }
1091
1092 defm : SamplePatterns
1093 IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
1094 IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
1095 IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
1096 v2i32>;
1097 defm : SamplePatterns
1098 IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
1099 IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
1100 IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
1101 v4i32>;
1102 defm : SamplePatterns
1103 IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
1104 IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
1105 IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
1106 v8i32>;
1107 defm : SamplePatterns
1108 IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
1109 IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
1110 IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
1111 v16i32>;
8686 // Is a packed VOP3P instruction.
8787 IsPacked = UINT64_C(1) << 49,
8888
89 // "d16" bit set or not.
90 D16 = UINT64_C(1) << 50
89 // Is a D16 buffer instruction.
90 D16Buf = UINT64_C(1) << 50
9191 };
9292
9393 // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
77617761 /// Adjust the writemask of MIMG instructions
77627762 SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
77637763 SelectionDAG &DAG) const {
7764 unsigned Opcode = Node->getMachineOpcode();
7765
7766 // Subtract 1 because the vdata output is not a MachineSDNode operand.
7767 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::d16) - 1;
7768 if (D16Idx >= 0 && Node->getConstantOperandVal(D16Idx))
7769 return Node; // not implemented for D16
7770
77647771 SDNode *Users[4] = { nullptr };
77657772 unsigned Lane = 0;
7766 unsigned DmaskIdx = (Node->getNumOperands() - Node->getNumValues() == 9) ? 2 : 3;
7773 unsigned DmaskIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) - 1;
77677774 unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
77687775 unsigned NewDmask = 0;
77697776 bool HasChain = Node->getNumValues() > 1;
79357942 unsigned Opcode = Node->getMachineOpcode();
79367943
79377944 if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
7938 !TII->isGather4(Opcode) && !TII->isD16(Opcode)) {
7945 !TII->isGather4(Opcode)) {
79397946 return adjustWritemask(Node, DAG);
79407947 }
79417948
117117 // This bit indicates that this is a packed VOP3P instruction
118118 field bit IsPacked = 0;
119119
120 // This bit indicates that this is a D16 instruction.
121 field bit D16 = 0;
120 // This bit indicates that this is a D16 buffer instruction.
121 field bit D16Buf = 0;
122122
123123 // These need to be kept in sync with the enum in SIInstrFlags.
124124 let TSFlags{0} = SALU;
175175
176176 let TSFlags{49} = IsPacked;
177177
178 let TSFlags{50} = D16;
178 let TSFlags{50} = D16Buf;
179179
180180 let SchedRW = [Write32Bit];
181181
254254 bits<1> tfe;
255255 bits<1> lwe;
256256 bits<1> slc;
257 bits<1> d16 = 0;
257 bit d16;
258258 bits<8> vaddr;
259259 bits<7> srsrc;
260260 bits<7> ssamp;
343343
344344 let UseNamedOperandTable = 1;
345345 let hasSideEffects = 0; // XXX ????
346 }
346
347 bit HasD16 = 0;
348 }
444444 return get(Opcode).TSFlags & SIInstrFlags::Gather4;
445445 }
446446
447 static bool isD16(const MachineInstr &MI) {
448 return MI.getDesc().TSFlags & SIInstrFlags::D16;
449 }
450
451 bool isD16(uint16_t Opcode) const {
452 return get(Opcode).TSFlags & SIInstrFlags::D16;
453 }
454
455447 static bool isFLAT(const MachineInstr &MI) {
456448 return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
457449 }
298298 def SIImage_gather4_c_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_O">;
299299 def SIImage_gather4_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL_O">;
300300 def SIImage_gather4_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ_O">;
301
302 class SDSample : SDNode
303 SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>,
304 SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
305 >;
306
307 def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
308 def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
309 def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
310 def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
311301
312302 def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
313303 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
20782068 let ValueCols = [["1"]];
20792069 }
20802070
2071 def getMIMGGatherOpPackedD16 : InstrMapping {
2072 let FilterClass = "MIMG_Gather_Size";
2073 let RowFields = ["Op"];
2074 let ColFields = ["Channels"];
2075 let KeyCol = ["4"];
2076 let ValueCols = [["2"]];
2077 }
2078
20812079 // Maps an commuted opcode to its original version
20822080 def getCommuteOrig : InstrMapping {
20832081 let FilterClass = "Commutable_REV";
165165 unsigned Opc, unsigned NewChannels);
166166
167167 LLVM_READONLY
168 int getMIMGGatherOpPackedD16(uint16_t Opcode);
169
170 LLVM_READONLY
168171 int getMCOpcode(uint16_t Opcode, unsigned Gen);
169172
170173 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
6060 %11.sub6 = COPY %1
6161 %11.sub7 = COPY %1
6262 %11.sub8 = COPY %1
63 dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec
63 dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec
6464 %20.sub1 = COPY %2
6565 %20.sub2 = COPY %2
6666 %20.sub3 = COPY %2
6969 %20.sub6 = COPY %2
7070 %20.sub7 = COPY %2
7171 %20.sub8 = COPY %2
72 dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec
72 dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec
7373
7474 ...
304304
305305 # GCN-LABEL: {{^}}name: image_clause{{$}}
306306 # GCN: early-clobber %4:vreg_128, early-clobber %3:vreg_128, early-clobber %5:vreg_128 = BUNDLE %0, undef %2:sreg_128, %1, implicit $exec {
307 # GCN-NEXT: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
308 # GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
309 # GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
310 # GCN-NEXT: }
311 # GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
307 # GCN-NEXT: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
308 # GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
309 # GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
310 # GCN-NEXT: }
311 # GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
312312
313313 ---
314314 name: image_clause
324324 bb.0:
325325 %0 = IMPLICIT_DEF
326326 %1 = IMPLICIT_DEF
327 %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
328 %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
329 %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
330 IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
331 IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
332 IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
327 %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
328 %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
329 %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
330 IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
331 IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
332 IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
333333 ...
334334
335335 # GCN-LABEL: {{^}}name: mixed_clause{{$}}
336336 # GCN: dead early-clobber %4:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vgpr_32 = BUNDLE %0, %2, %1, implicit $exec {
337 # GCN-NEXT: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
337 # GCN-NEXT: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
338338 # GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
339339 # GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
340340 # GCN-NEXT: }
354354 %0 = IMPLICIT_DEF
355355 %1 = IMPLICIT_DEF
356356 %2 = IMPLICIT_DEF
357 %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
357 %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
358358 %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
359359 %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
360360 ...
355355 // GCN: image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x8 ; encoding: [0x00,0x08,0x00,0xf1,0x01,0x05,0x62,0x00]
356356
357357 image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16
358 // NOSICI: error: instruction not supported on this GPU
358 // NOSICI: error: d16 modifier is not supported on this GPU
359359 // GFX8_0: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
360 // NOGFX8_1: error: instruction not supported on this GPU
361 // NOGFX9: error: instruction not supported on this GPU
360 // NOGFX8_1: error: image data size does not match dmask and tfe
361 // NOGFX9: error: image data size does not match dmask and tfe
362362
363363 image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16
364364 // NOSICI: error: d16 modifier is not supported on this GPU
365 // NOGFX8_0: error: instruction not supported on this GPU
365 // NOGFX8_0: error: image data size does not match dmask and tfe
366366 // GFX8_1: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
367367 // GFX9: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
368368
369 // FIXME: d16 is handled as an optional modifier, should it be corrected?
370369 image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1
371 // NOSICI: error: d16 modifier is not supported on this GPU
372 // NOGFX8_0: error: instruction not supported on this GPU
373 // GFX8_1: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
374 // GFX9: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
370 // NOSICI: error: image data size does not match dmask and tfe
371 // NOGFX8_0: error: image data size does not match dmask and tfe
372 // NOGFX8_1: error: image data size does not match dmask and tfe
373 // NOGFX9: error: image data size does not match dmask and tfe